00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. July 2011 00005 * $Revision: V1.0.10 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_f32.c 00009 * 00010 * Description: Floating-point FIR filter processing function. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0 00013 * 00014 * Version 1.0.10 2011/7/15 00015 * Big Endian support added and Merged M0 and M3/M4 Source code. 00016 * 00017 * Version 1.0.3 2010/11/29 00018 * Re-organized the CMSIS folders and updated documentation. 00019 * 00020 * Version 1.0.2 2010/11/11 00021 * Documentation updated. 00022 * 00023 * Version 1.0.1 2010/10/05 00024 * Production release and review comments incorporated. 00025 * 00026 * Version 1.0.0 2010/09/20 00027 * Production release and review comments incorporated. 00028 * 00029 * Version 0.0.5 2010/04/26 00030 * incorporated review comments and updated with latest CMSIS layer 00031 * 00032 * Version 0.0.3 2010/03/10 00033 * Initial version 00034 * -------------------------------------------------------------------- */ 00035 00036 #include "arm_math.h" 00037 00128 void arm_fir_f32( 00129 const arm_fir_instance_f32 * S, 00130 float32_t * pSrc, 00131 float32_t * pDst, 00132 uint32_t blockSize) 00133 { 00134 00135 float32_t *pState = S->pState; /* State pointer */ 00136 float32_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00137 float32_t *pStateCurnt; /* Points to the current sample of the state */ 00138 float32_t *px, *pb; /* Temporary pointers for state and coefficient buffers */ 00139 uint32_t numTaps = S->numTaps; /* Number of filter coefficients in the filter */ 00140 uint32_t i, tapCnt, blkCnt; /* Loop counters */ 00141 00142 00143 #ifndef ARM_MATH_CM0 00144 00145 /* Run the below code for Cortex-M4 and Cortex-M3 */ 00146 00147 float32_t acc0, acc1, acc2, acc3; /* Accumulators */ 00148 float32_t x0, x1, x2, x3, c0; /* Temporary variables to hold state and coefficient values */ 00149 00150 00151 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00152 /* pStateCurnt points to the location where the new input data should be written */ 00153 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00154 00155 /* Apply loop unrolling and compute 4 output values simultaneously. 00156 * The variables acc0 ... acc3 hold output values that are being computed: 00157 * 00158 * acc0 = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] 00159 * acc1 = b[numTaps-1] * x[n-numTaps] + b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1] 00160 * acc2 = b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] + b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2] 00161 * acc3 = b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps] +...+ b[0] * x[3] 00162 */ 00163 blkCnt = blockSize >> 2; 00164 00165 /* First part of the processing with loop unrolling. Compute 4 outputs at a time. 00166 ** a second loop below computes the remaining 1 to 3 samples. */ 00167 while(blkCnt > 0u) 00168 { 00169 /* Copy four new input samples into the state buffer */ 00170 *pStateCurnt++ = *pSrc++; 00171 *pStateCurnt++ = *pSrc++; 00172 *pStateCurnt++ = *pSrc++; 00173 *pStateCurnt++ = *pSrc++; 00174 00175 /* Set all accumulators to zero */ 00176 acc0 = 0.0f; 00177 acc1 = 0.0f; 00178 acc2 = 0.0f; 00179 acc3 = 0.0f; 00180 00181 /* Initialize state pointer */ 00182 px = pState; 00183 00184 /* Initialize coeff pointer */ 00185 pb = (pCoeffs); 00186 00187 /* Read the first three samples from the state buffer: x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */ 00188 x0 = *px++; 00189 x1 = *px++; 00190 x2 = *px++; 00191 00192 /* Loop unrolling. Process 4 taps at a time. */ 00193 tapCnt = numTaps >> 2u; 00194 00195 /* Loop over the number of taps. Unroll by a factor of 4. 00196 ** Repeat until we've computed numTaps-4 coefficients. */ 00197 while(tapCnt > 0u) 00198 { 00199 /* Read the b[numTaps-1] coefficient */ 00200 c0 = *(pb++); 00201 00202 /* Read x[n-numTaps-3] sample */ 00203 x3 = *(px++); 00204 00205 /* acc0 += b[numTaps-1] * x[n-numTaps] */ 00206 acc0 += x0 * c0; 00207 00208 /* acc1 += b[numTaps-1] * x[n-numTaps-1] */ 00209 acc1 += x1 * c0; 00210 00211 /* acc2 += b[numTaps-1] * x[n-numTaps-2] */ 00212 acc2 += x2 * c0; 00213 00214 /* acc3 += b[numTaps-1] * x[n-numTaps-3] */ 00215 acc3 += x3 * c0; 00216 00217 /* Read the b[numTaps-2] coefficient */ 00218 c0 = *(pb++); 00219 00220 /* Read x[n-numTaps-4] sample */ 00221 x0 = *(px++); 00222 00223 /* Perform the multiply-accumulate */ 00224 acc0 += x1 * c0; 00225 acc1 += x2 * c0; 00226 acc2 += x3 * c0; 00227 acc3 += x0 * c0; 00228 00229 /* Read the b[numTaps-3] coefficient */ 00230 c0 = *(pb++); 00231 00232 /* Read x[n-numTaps-5] sample */ 00233 x1 = *(px++); 00234 00235 /* Perform the multiply-accumulates */ 00236 acc0 += x2 * c0; 00237 acc1 += x3 * c0; 00238 acc2 += x0 * c0; 00239 acc3 += x1 * c0; 00240 00241 /* Read the b[numTaps-4] coefficient */ 00242 c0 = *(pb++); 00243 00244 /* Read x[n-numTaps-6] sample */ 00245 x2 = *(px++); 00246 00247 /* Perform the multiply-accumulates */ 00248 acc0 += x3 * c0; 00249 acc1 += x0 * c0; 00250 acc2 += x1 * c0; 00251 acc3 += x2 * c0; 00252 00253 tapCnt--; 00254 } 00255 00256 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00257 tapCnt = numTaps % 0x4u; 00258 00259 while(tapCnt > 0u) 00260 { 00261 /* Read coefficients */ 00262 c0 = *(pb++); 00263 00264 /* Fetch 1 state variable */ 00265 x3 = *(px++); 00266 00267 /* Perform the multiply-accumulates */ 00268 acc0 += x0 * c0; 00269 acc1 += x1 * c0; 00270 acc2 += x2 * c0; 00271 acc3 += x3 * c0; 00272 00273 /* Reuse the present sample states for next sample */ 00274 x0 = x1; 00275 x1 = x2; 00276 x2 = x3; 00277 00278 /* Decrement the loop counter */ 00279 tapCnt--; 00280 } 00281 00282 /* Advance the state pointer by 4 to process the next group of 4 samples */ 00283 pState = pState + 4; 00284 00285 /* The results in the 4 accumulators, store in the destination buffer. */ 00286 *pDst++ = acc0; 00287 *pDst++ = acc1; 00288 *pDst++ = acc2; 00289 *pDst++ = acc3; 00290 00291 blkCnt--; 00292 } 00293 00294 /* If the blockSize is not a multiple of 4, compute any remaining output samples here. 00295 ** No loop unrolling is used. */ 00296 blkCnt = blockSize % 0x4u; 00297 00298 while(blkCnt > 0u) 00299 { 00300 /* Copy one sample at a time into state buffer */ 00301 *pStateCurnt++ = *pSrc++; 00302 00303 /* Set the accumulator to zero */ 00304 acc0 = 0.0f; 00305 00306 /* Initialize state pointer */ 00307 px = pState; 00308 00309 /* Initialize Coefficient pointer */ 00310 pb = (pCoeffs); 00311 00312 i = numTaps; 00313 00314 /* Perform the multiply-accumulates */ 00315 do 00316 { 00317 acc0 += *px++ * *pb++; 00318 i--; 00319 00320 } while(i > 0u); 00321 00322 /* The result is store in the destination buffer. */ 00323 *pDst++ = acc0; 00324 00325 /* Advance state pointer by 1 for the next sample */ 00326 pState = pState + 1; 00327 00328 blkCnt--; 00329 } 00330 00331 /* Processing is complete. 00332 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00333 ** This prepares the state buffer for the next function call. */ 00334 00335 /* Points to the start of the state buffer */ 00336 pStateCurnt = S->pState; 00337 00338 tapCnt = (numTaps - 1u) >> 2u; 00339 00340 /* copy data */ 00341 while(tapCnt > 0u) 00342 { 00343 *pStateCurnt++ = *pState++; 00344 *pStateCurnt++ = *pState++; 00345 *pStateCurnt++ = *pState++; 00346 *pStateCurnt++ = *pState++; 00347 00348 /* Decrement the loop counter */ 00349 tapCnt--; 00350 } 00351 00352 /* Calculate remaining number of copies */ 00353 tapCnt = (numTaps - 1u) % 0x4u; 00354 00355 /* Copy the remaining q31_t data */ 00356 while(tapCnt > 0u) 00357 { 00358 *pStateCurnt++ = *pState++; 00359 00360 /* Decrement the loop counter */ 00361 tapCnt--; 00362 } 00363 00364 #else 00365 00366 /* Run the below code for Cortex-M0 */ 00367 00368 float32_t acc; 00369 00370 /* S->pState points to state array which contains previous frame (numTaps - 1) samples */ 00371 /* pStateCurnt points to the location where the new input data should be written */ 00372 pStateCurnt = &(S->pState[(numTaps - 1u)]); 00373 00374 /* Initialize blkCnt with blockSize */ 00375 blkCnt = blockSize; 00376 00377 while(blkCnt > 0u) 00378 { 00379 /* Copy one sample at a time into state buffer */ 00380 *pStateCurnt++ = *pSrc++; 00381 00382 /* Set the accumulator to zero */ 00383 acc = 0.0f; 00384 00385 /* Initialize state pointer */ 00386 px = pState; 00387 00388 /* Initialize Coefficient pointer */ 00389 pb = pCoeffs; 00390 00391 i = numTaps; 00392 00393 /* Perform the multiply-accumulates */ 00394 do 00395 { 00396 /* acc = b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */ 00397 acc += *px++ * *pb++; 00398 i--; 00399 00400 } while(i > 0u); 00401 00402 /* The result is store in the destination buffer. */ 00403 *pDst++ = acc; 00404 00405 /* Advance state pointer by 1 for the next sample */ 00406 pState = pState + 1; 00407 00408 blkCnt--; 00409 } 00410 00411 /* Processing is complete. 00412 ** Now copy the last numTaps - 1 samples to the starting of the state buffer. 00413 ** This prepares the state buffer for the next function call. */ 00414 00415 /* Points to the start of the state buffer */ 00416 pStateCurnt = S->pState; 00417 00418 /* Copy numTaps number of values */ 00419 tapCnt = numTaps - 1u; 00420 00421 /* Copy data */ 00422 while(tapCnt > 0u) 00423 { 00424 *pStateCurnt++ = *pState++; 00425 00426 /* Decrement the loop counter */ 00427 tapCnt--; 00428 } 00429 00430 #endif /* #ifndef ARM_MATH_CM0 */ 00431 00432 } 00433