CMSIS DSP Software Library: arm_fir

Go to the documentation of this file.
00001 /* ----------------------------------------------------------------------   
00002 * Copyright (C) 2010 ARM Limited. All rights reserved.   
00003 *   
00004 * $Date:        15. July 2011  
00005 * $Revision:    V1.0.10  
00006 *   
00007 * Project:      CMSIS DSP Library   
00008 * Title:        arm_fir_f32.c   
00009 *   
00010 * Description:  Floating-point FIR filter processing function.   
00011 *   
00012 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
00013 *  
00014 * Version 1.0.10 2011/7/15 
00015 *    Big Endian support added and Merged M0 and M3/M4 Source code.  
00016 *   
00017 * Version 1.0.3 2010/11/29  
00018 *    Re-organized the CMSIS folders and updated documentation.   
00019 *    
00020 * Version 1.0.2 2010/11/11   
00021 *    Documentation updated.    
00022 *   
00023 * Version 1.0.1 2010/10/05    
00024 *    Production release and review comments incorporated.   
00025 *   
00026 * Version 1.0.0 2010/09/20    
00027 *    Production release and review comments incorporated.   
00028 *   
00029 * Version 0.0.5  2010/04/26    
00030 *    incorporated review comments and updated with latest CMSIS layer   
00031 *   
00032 * Version 0.0.3  2010/03/10    
00033 *    Initial version   
00034 * -------------------------------------------------------------------- */
00035 
00036 #include "arm_math.h"
00037 
00128 void arm_fir_f32(
00129   const arm_fir_instance_f32 * S,
00130   float32_t * pSrc,
00131   float32_t * pDst,
00132   uint32_t blockSize)
00133 {
00134 
00135   float32_t *pState = S->pState;                 /* State pointer */
00136   float32_t *pCoeffs = S->pCoeffs;               /* Coefficient pointer */
00137   float32_t *pStateCurnt;                        /* Points to the current sample of the state */
00138   float32_t *px, *pb;                            /* Temporary pointers for state and coefficient buffers */
00139   uint32_t numTaps = S->numTaps;                 /* Number of filter coefficients in the filter */
00140   uint32_t i, tapCnt, blkCnt;                    /* Loop counters */
00141 
00142 
00143 #ifndef ARM_MATH_CM0
00144 
00145   /* Run the below code for Cortex-M4 and Cortex-M3 */
00146 
00147   float32_t acc0, acc1, acc2, acc3;              /* Accumulators */
00148   float32_t x0, x1, x2, x3, c0;                  /* Temporary variables to hold state and coefficient values */
00149 
00150 
00151   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
00152   /* pStateCurnt points to the location where the new input data should be written */
00153   pStateCurnt = &(S->pState[(numTaps - 1u)]);
00154 
00155   /* Apply loop unrolling and compute 4 output values simultaneously.   
00156    * The variables acc0 ... acc3 hold output values that are being computed:   
00157    *   
00158    *    acc0 =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0]   
00159    *    acc1 =  b[numTaps-1] * x[n-numTaps] +   b[numTaps-2] * x[n-numTaps-1] + b[numTaps-3] * x[n-numTaps-2] +...+ b[0] * x[1]   
00160    *    acc2 =  b[numTaps-1] * x[n-numTaps+1] + b[numTaps-2] * x[n-numTaps] +   b[numTaps-3] * x[n-numTaps-1] +...+ b[0] * x[2]   
00161    *    acc3 =  b[numTaps-1] * x[n-numTaps+2] + b[numTaps-2] * x[n-numTaps+1] + b[numTaps-3] * x[n-numTaps]   +...+ b[0] * x[3]   
00162    */
00163   blkCnt = blockSize >> 2;
00164 
00165   /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.   
00166    ** a second loop below computes the remaining 1 to 3 samples. */
00167   while(blkCnt > 0u)
00168   {
00169     /* Copy four new input samples into the state buffer */
00170     *pStateCurnt++ = *pSrc++;
00171     *pStateCurnt++ = *pSrc++;
00172     *pStateCurnt++ = *pSrc++;
00173     *pStateCurnt++ = *pSrc++;
00174 
00175     /* Set all accumulators to zero */
00176     acc0 = 0.0f;
00177     acc1 = 0.0f;
00178     acc2 = 0.0f;
00179     acc3 = 0.0f;
00180 
00181     /* Initialize state pointer */
00182     px = pState;
00183 
00184     /* Initialize coeff pointer */
00185     pb = (pCoeffs);
00186 
00187     /* Read the first three samples from the state buffer:  x[n-numTaps], x[n-numTaps-1], x[n-numTaps-2] */
00188     x0 = *px++;
00189     x1 = *px++;
00190     x2 = *px++;
00191 
00192     /* Loop unrolling.  Process 4 taps at a time. */
00193     tapCnt = numTaps >> 2u;
00194 
00195     /* Loop over the number of taps.  Unroll by a factor of 4.   
00196      ** Repeat until we've computed numTaps-4 coefficients. */
00197     while(tapCnt > 0u)
00198     {
00199       /* Read the b[numTaps-1] coefficient */
00200       c0 = *(pb++);
00201 
00202       /* Read x[n-numTaps-3] sample */
00203       x3 = *(px++);
00204 
00205       /* acc0 +=  b[numTaps-1] * x[n-numTaps] */
00206       acc0 += x0 * c0;
00207 
00208       /* acc1 +=  b[numTaps-1] * x[n-numTaps-1] */
00209       acc1 += x1 * c0;
00210 
00211       /* acc2 +=  b[numTaps-1] * x[n-numTaps-2] */
00212       acc2 += x2 * c0;
00213 
00214       /* acc3 +=  b[numTaps-1] * x[n-numTaps-3] */
00215       acc3 += x3 * c0;
00216 
00217       /* Read the b[numTaps-2] coefficient */
00218       c0 = *(pb++);
00219 
00220       /* Read x[n-numTaps-4] sample */
00221       x0 = *(px++);
00222 
00223       /* Perform the multiply-accumulate */
00224       acc0 += x1 * c0;
00225       acc1 += x2 * c0;
00226       acc2 += x3 * c0;
00227       acc3 += x0 * c0;
00228 
00229       /* Read the b[numTaps-3] coefficient */
00230       c0 = *(pb++);
00231 
00232       /* Read x[n-numTaps-5] sample */
00233       x1 = *(px++);
00234 
00235       /* Perform the multiply-accumulates */
00236       acc0 += x2 * c0;
00237       acc1 += x3 * c0;
00238       acc2 += x0 * c0;
00239       acc3 += x1 * c0;
00240 
00241       /* Read the b[numTaps-4] coefficient */
00242       c0 = *(pb++);
00243 
00244       /* Read x[n-numTaps-6] sample */
00245       x2 = *(px++);
00246 
00247       /* Perform the multiply-accumulates */
00248       acc0 += x3 * c0;
00249       acc1 += x0 * c0;
00250       acc2 += x1 * c0;
00251       acc3 += x2 * c0;
00252 
00253       tapCnt--;
00254     }
00255 
00256     /* If the filter length is not a multiple of 4, compute the remaining filter taps */
00257     tapCnt = numTaps % 0x4u;
00258 
00259     while(tapCnt > 0u)
00260     {
00261       /* Read coefficients */
00262       c0 = *(pb++);
00263 
00264       /* Fetch 1 state variable */
00265       x3 = *(px++);
00266 
00267       /* Perform the multiply-accumulates */
00268       acc0 += x0 * c0;
00269       acc1 += x1 * c0;
00270       acc2 += x2 * c0;
00271       acc3 += x3 * c0;
00272 
00273       /* Reuse the present sample states for next sample */
00274       x0 = x1;
00275       x1 = x2;
00276       x2 = x3;
00277 
00278       /* Decrement the loop counter */
00279       tapCnt--;
00280     }
00281 
00282     /* Advance the state pointer by 4 to process the next group of 4 samples */
00283     pState = pState + 4;
00284 
00285     /* The results in the 4 accumulators, store in the destination buffer. */
00286     *pDst++ = acc0;
00287     *pDst++ = acc1;
00288     *pDst++ = acc2;
00289     *pDst++ = acc3;
00290 
00291     blkCnt--;
00292   }
00293 
00294   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.   
00295    ** No loop unrolling is used. */
00296   blkCnt = blockSize % 0x4u;
00297 
00298   while(blkCnt > 0u)
00299   {
00300     /* Copy one sample at a time into state buffer */
00301     *pStateCurnt++ = *pSrc++;
00302 
00303     /* Set the accumulator to zero */
00304     acc0 = 0.0f;
00305 
00306     /* Initialize state pointer */
00307     px = pState;
00308 
00309     /* Initialize Coefficient pointer */
00310     pb = (pCoeffs);
00311 
00312     i = numTaps;
00313 
00314     /* Perform the multiply-accumulates */
00315     do
00316     {
00317       acc0 += *px++ * *pb++;
00318       i--;
00319 
00320     } while(i > 0u);
00321 
00322     /* The result is store in the destination buffer. */
00323     *pDst++ = acc0;
00324 
00325     /* Advance state pointer by 1 for the next sample */
00326     pState = pState + 1;
00327 
00328     blkCnt--;
00329   }
00330 
00331   /* Processing is complete.   
00332    ** Now copy the last numTaps - 1 samples to the satrt of the state buffer.   
00333    ** This prepares the state buffer for the next function call. */
00334 
00335   /* Points to the start of the state buffer */
00336   pStateCurnt = S->pState;
00337 
00338   tapCnt = (numTaps - 1u) >> 2u;
00339 
00340   /* copy data */
00341   while(tapCnt > 0u)
00342   {
00343     *pStateCurnt++ = *pState++;
00344     *pStateCurnt++ = *pState++;
00345     *pStateCurnt++ = *pState++;
00346     *pStateCurnt++ = *pState++;
00347 
00348     /* Decrement the loop counter */
00349     tapCnt--;
00350   }
00351 
00352   /* Calculate remaining number of copies */
00353   tapCnt = (numTaps - 1u) % 0x4u;
00354 
00355   /* Copy the remaining q31_t data */
00356   while(tapCnt > 0u)
00357   {
00358     *pStateCurnt++ = *pState++;
00359 
00360     /* Decrement the loop counter */
00361     tapCnt--;
00362   }
00363 
00364 #else
00365 
00366   /* Run the below code for Cortex-M0 */
00367 
00368   float32_t acc;
00369 
00370   /* S->pState points to state array which contains previous frame (numTaps - 1) samples */
00371   /* pStateCurnt points to the location where the new input data should be written */
00372   pStateCurnt = &(S->pState[(numTaps - 1u)]);
00373 
00374   /* Initialize blkCnt with blockSize */
00375   blkCnt = blockSize;
00376 
00377   while(blkCnt > 0u)
00378   {
00379     /* Copy one sample at a time into state buffer */
00380     *pStateCurnt++ = *pSrc++;
00381 
00382     /* Set the accumulator to zero */
00383     acc = 0.0f;
00384 
00385     /* Initialize state pointer */
00386     px = pState;
00387 
00388     /* Initialize Coefficient pointer */
00389     pb = pCoeffs;
00390 
00391     i = numTaps;
00392 
00393     /* Perform the multiply-accumulates */
00394     do
00395     {
00396       /* acc =  b[numTaps-1] * x[n-numTaps-1] + b[numTaps-2] * x[n-numTaps-2] + b[numTaps-3] * x[n-numTaps-3] +...+ b[0] * x[0] */
00397       acc += *px++ * *pb++;
00398       i--;
00399 
00400     } while(i > 0u);
00401 
00402     /* The result is store in the destination buffer. */
00403     *pDst++ = acc;
00404 
00405     /* Advance state pointer by 1 for the next sample */
00406     pState = pState + 1;
00407 
00408     blkCnt--;
00409   }
00410 
00411   /* Processing is complete.        
00412    ** Now copy the last numTaps - 1 samples to the starting of the state buffer.      
00413    ** This prepares the state buffer for the next function call. */
00414 
00415   /* Points to the start of the state buffer */
00416   pStateCurnt = S->pState;
00417 
00418   /* Copy numTaps number of values */
00419   tapCnt = numTaps - 1u;
00420 
00421   /* Copy data */
00422   while(tapCnt > 0u)
00423   {
00424     *pStateCurnt++ = *pState++;
00425 
00426     /* Decrement the loop counter */
00427     tapCnt--;
00428   }
00429 
00430 #endif /*   #ifndef ARM_MATH_CM0 */
00431 
00432 }
00433