From d3387fb952dcdaf09c2978552bf0119a9d71ecfb Mon Sep 17 00:00:00 2001 From: Hardik Sharma Date: Tue, 28 Apr 2026 08:45:32 -0700 Subject: [PATCH 1/2] Add C++ unit tests for cadence::quantized_conv1d_ncl.out and quantized_conv1d_nlc.out (#19161) Summary: Adds C++ unit tests for cadence::quantized_conv1d_ncl.out and cadence::quantized_conv1d_nlc.out in test_op_quantized_conv1d_ncl.cpp. Also modifies op_quantized_conv1d_ncl.cpp and op_quantized_conv1d_nlc.cpp in the HiFi backend to fix correctness issues surfaced by the new tests. Reviewed By: mcremon-meta, zonglinpeng Differential Revision: D97886683 --- .../hifi/operators/op_quantized_conv1d_ncl.cpp | 11 +++++++---- .../hifi/operators/op_quantized_conv1d_nlc.cpp | 6 +++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/backends/cadence/hifi/operators/op_quantized_conv1d_ncl.cpp b/backends/cadence/hifi/operators/op_quantized_conv1d_ncl.cpp index ccc81a35aba..a0bed1e0b70 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv1d_ncl.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv1d_ncl.cpp @@ -240,7 +240,10 @@ void xa_opt_quantized_conv1d_ncl_asym8uxsym8u_asym8u( WORD32 x_stride = stride[0]; WORD32 x_padding = padding[0]; WORD32 input_zero_bias = -in_zero_point; - WORD32 out_multiplier32 = bias_scale * (1. / output_scale) * 2147483648; + const float eff_scale = bias_scale * (1.0f / output_scale); + WORD32 out_multiplier32 = (eff_scale >= 1.0f) + ? static_cast(2147483647) + : static_cast(eff_scale * 2147483648.0f); WORD32 out_shift32 = 0; WORD32 kernel_zero_bias = -weight_zero_point; @@ -419,9 +422,9 @@ void quantized_conv1d_ncl_per_tensor_out( out); } } else if (dtype == ScalarType::Byte) { - // HiFi nnlib conv1d_std kernel does not support depthwise (groups > 1). - // Fall back to generic implementation. - if (groups > 1) { + // HiFi nnlib conv1d_std kernel does not support depthwise (groups > 1) + // or stride > 1. Fall back to generic implementation. + if (groups > 1 || stride[0] > 1) { impl::generic::native::quantized_conv1d_ncl_per_tensor_out( ctx, input, diff --git a/backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp b/backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp index 2a11dbf358d..10c00bf536b 100644 --- a/backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp +++ b/backends/cadence/hifi/operators/op_quantized_conv1d_nlc.cpp @@ -298,9 +298,9 @@ void quantized_conv1d_nlc_per_tensor_out( out); } } else if (dtype == ScalarType::Byte) { - // HiFi nnlib conv1d_std kernel does not support depthwise (groups > 1). - // Fall back to generic implementation. - if (groups > 1) { + // HiFi nnlib conv1d_std kernel does not support depthwise (groups > 1) + // or stride > 1. Fall back to generic implementation. + if (groups > 1 || stride[0] > 1) { impl::generic::native::quantized_conv1d_nlc_per_tensor_out( ctx, input, From 3f3d68405ea65d40bf8bc94f78ba164e131675e1 Mon Sep 17 00:00:00 2001 From: Hardik Sharma Date: Tue, 28 Apr 2026 08:45:32 -0700 Subject: [PATCH 2/2] Add C++ unit tests for cadence::fully_connected.out (#19165) Summary: Adds C++ unit tests for cadence::fully_connected.out in test_op_fully_connected.cpp. Tests verify correct dense layer output (y = Wx + b) across various input sizes, output feature counts, and with and without bias. Reviewed By: zonglinpeng, mcremon-meta Differential Revision: D97889888 --- backends/cadence/generic/operators/op_fully_connected.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backends/cadence/generic/operators/op_fully_connected.cpp b/backends/cadence/generic/operators/op_fully_connected.cpp index f1e53ad5f76..36befc52102 100644 --- a/backends/cadence/generic/operators/op_fully_connected.cpp +++ b/backends/cadence/generic/operators/op_fully_connected.cpp @@ -27,7 +27,8 @@ void linear( Tensor& output) { const float* __restrict__ input_data = input.const_data_ptr(); const float* __restrict__ weight_data = weight.const_data_ptr(); - const float* __restrict__ bias_data = bias.value().const_data_ptr(); + const float* __restrict__ bias_data = + bias.has_value() ? bias.value().const_data_ptr() : nullptr; float* __restrict__ output_data = output.mutable_data_ptr(); // input comes in shape [batch_size, in_dim] @@ -43,7 +44,7 @@ void linear( for (int i = 0; i < leading_dims; ++i) { for (int j = 0; j < M; ++j) { - float sum = bias_data[j]; + float sum = bias_data != nullptr ? bias_data[j] : 0.0f; for (int k = 0; k < N; ++k) { sum += input_data[i * N + k] * weight_data[j * N + k]; }