Squashed 'tmk_core/' changes from 7967731..b9e0ea0

b9e0ea0 Merge commit '7fa9d8bdea3773d1195b04d98fcf27cf48ddd81d' as 'tool/mbed/mbed-sdk'
7fa9d8b Squashed 'tool/mbed/mbed-sdk/' content from commit 7c21ce5

git-subtree-dir: tmk_core
git-subtree-split: b9e0ea08cb940de20b3610ecdda18e9d8cd7c552
This commit is contained in:
Jun Wako
2015-04-24 16:26:14 +09:00
parent a20ef7052c
commit 1fe4406f37
4198 changed files with 2016457 additions and 0 deletions

View File

@ -0,0 +1,242 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_bitreversal.c
*
* Description: This file has common tables like Bitreverse, reciprocal etc which are used across different functions
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/*
* @brief In-place bit reversal function.
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
* @param[in] fftSize length of the FFT.
* @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table.
* @param[in] *pBitRevTab points to the bit reversal table.
* @return none.
*/
void arm_bitreversal_f32(
float32_t * pSrc,
uint16_t fftSize,
uint16_t bitRevFactor,
uint16_t * pBitRevTab)
{
uint16_t fftLenBy2, fftLenBy2p1;
uint16_t i, j;
float32_t in;
/* Initializations */
j = 0u;
fftLenBy2 = fftSize >> 1u;
fftLenBy2p1 = (fftSize >> 1u) + 1u;
/* Bit Reversal Implementation */
for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u)
{
if(i < j)
{
/* pSrc[i] <-> pSrc[j]; */
in = pSrc[2u * i];
pSrc[2u * i] = pSrc[2u * j];
pSrc[2u * j] = in;
/* pSrc[i+1u] <-> pSrc[j+1u] */
in = pSrc[(2u * i) + 1u];
pSrc[(2u * i) + 1u] = pSrc[(2u * j) + 1u];
pSrc[(2u * j) + 1u] = in;
/* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
in = pSrc[2u * (i + fftLenBy2p1)];
pSrc[2u * (i + fftLenBy2p1)] = pSrc[2u * (j + fftLenBy2p1)];
pSrc[2u * (j + fftLenBy2p1)] = in;
/* pSrc[i+fftLenBy2p1+1u] <-> pSrc[j+fftLenBy2p1+1u] */
in = pSrc[(2u * (i + fftLenBy2p1)) + 1u];
pSrc[(2u * (i + fftLenBy2p1)) + 1u] =
pSrc[(2u * (j + fftLenBy2p1)) + 1u];
pSrc[(2u * (j + fftLenBy2p1)) + 1u] = in;
}
/* pSrc[i+1u] <-> pSrc[j+1u] */
in = pSrc[2u * (i + 1u)];
pSrc[2u * (i + 1u)] = pSrc[2u * (j + fftLenBy2)];
pSrc[2u * (j + fftLenBy2)] = in;
/* pSrc[i+2u] <-> pSrc[j+2u] */
in = pSrc[(2u * (i + 1u)) + 1u];
pSrc[(2u * (i + 1u)) + 1u] = pSrc[(2u * (j + fftLenBy2)) + 1u];
pSrc[(2u * (j + fftLenBy2)) + 1u] = in;
/* Reading the index for the bit reversal */
j = *pBitRevTab;
/* Updating the bit reversal index depending on the fft length */
pBitRevTab += bitRevFactor;
}
}
/*
* @brief In-place bit reversal function.
* @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
* @param[in] fftLen length of the FFT.
* @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
* @param[in] *pBitRevTab points to bit reversal table.
* @return none.
*/
void arm_bitreversal_q31(
q31_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTable)
{
uint32_t fftLenBy2, fftLenBy2p1, i, j;
q31_t in;
/* Initializations */
j = 0u;
fftLenBy2 = fftLen / 2u;
fftLenBy2p1 = (fftLen / 2u) + 1u;
/* Bit Reversal Implementation */
for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u)
{
if(i < j)
{
/* pSrc[i] <-> pSrc[j]; */
in = pSrc[2u * i];
pSrc[2u * i] = pSrc[2u * j];
pSrc[2u * j] = in;
/* pSrc[i+1u] <-> pSrc[j+1u] */
in = pSrc[(2u * i) + 1u];
pSrc[(2u * i) + 1u] = pSrc[(2u * j) + 1u];
pSrc[(2u * j) + 1u] = in;
/* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
in = pSrc[2u * (i + fftLenBy2p1)];
pSrc[2u * (i + fftLenBy2p1)] = pSrc[2u * (j + fftLenBy2p1)];
pSrc[2u * (j + fftLenBy2p1)] = in;
/* pSrc[i+fftLenBy2p1+1u] <-> pSrc[j+fftLenBy2p1+1u] */
in = pSrc[(2u * (i + fftLenBy2p1)) + 1u];
pSrc[(2u * (i + fftLenBy2p1)) + 1u] =
pSrc[(2u * (j + fftLenBy2p1)) + 1u];
pSrc[(2u * (j + fftLenBy2p1)) + 1u] = in;
}
/* pSrc[i+1u] <-> pSrc[j+1u] */
in = pSrc[2u * (i + 1u)];
pSrc[2u * (i + 1u)] = pSrc[2u * (j + fftLenBy2)];
pSrc[2u * (j + fftLenBy2)] = in;
/* pSrc[i+2u] <-> pSrc[j+2u] */
in = pSrc[(2u * (i + 1u)) + 1u];
pSrc[(2u * (i + 1u)) + 1u] = pSrc[(2u * (j + fftLenBy2)) + 1u];
pSrc[(2u * (j + fftLenBy2)) + 1u] = in;
/* Reading the index for the bit reversal */
j = *pBitRevTable;
/* Updating the bit reversal index depending on the fft length */
pBitRevTable += bitRevFactor;
}
}
/*
* @brief In-place bit reversal function.
* @param[in, out] *pSrc points to the in-place buffer of Q15 data type.
* @param[in] fftLen length of the FFT.
* @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
* @param[in] *pBitRevTab points to bit reversal table.
* @return none.
*/
void arm_bitreversal_q15(
q15_t * pSrc16,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTab)
{
q31_t *pSrc = (q31_t *) pSrc16;
q31_t in;
uint32_t fftLenBy2, fftLenBy2p1;
uint32_t i, j;
/* Initializations */
j = 0u;
fftLenBy2 = fftLen / 2u;
fftLenBy2p1 = (fftLen / 2u) + 1u;
/* Bit Reversal Implementation */
for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u)
{
if(i < j)
{
/* pSrc[i] <-> pSrc[j]; */
/* pSrc[i+1u] <-> pSrc[j+1u] */
in = pSrc[i];
pSrc[i] = pSrc[j];
pSrc[j] = in;
/* pSrc[i + fftLenBy2p1] <-> pSrc[j + fftLenBy2p1]; */
/* pSrc[i + fftLenBy2p1+1u] <-> pSrc[j + fftLenBy2p1+1u] */
in = pSrc[i + fftLenBy2p1];
pSrc[i + fftLenBy2p1] = pSrc[j + fftLenBy2p1];
pSrc[j + fftLenBy2p1] = in;
}
/* pSrc[i+1u] <-> pSrc[j+fftLenBy2]; */
/* pSrc[i+2] <-> pSrc[j+fftLenBy2+1u] */
in = pSrc[i + 1u];
pSrc[i + 1u] = pSrc[j + fftLenBy2];
pSrc[j + fftLenBy2] = in;
/* Reading the index for the bit reversal */
j = *pBitRevTab;
/* Updating the bit reversal index depending on the fft length */
pBitRevTab += bitRevFactor;
}
}

View File

@ -0,0 +1,148 @@
;/* ----------------------------------------------------------------------
;* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
;*
;* $Date: 17. January 2013
;* $Revision: V1.4.1
;*
;* Project: CMSIS DSP Library
;* Title: arm_bitreversal2.S
;*
;* Description: This is the arm_bitreversal_32 function done in
;* assembly for maximum speed. This function is called
;* after doing an fft to reorder the output. The function
;* is loop unrolled by 2.
;*
;* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;* - Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;* - Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;* - Neither the name of ARM LIMITED nor the names of its contributors
;* may be used to endorse or promote products derived from this
;* software without specific prior written permission.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;* -------------------------------------------------------------------- */
#if defined(__CC_ARM) //Keil
#define CODESECT AREA ||.text||, CODE, READONLY, ALIGN=2
#define LABEL
#elif defined(__IASMARM__) //IAR
#define CODESECT SECTION `.text`:CODE
#define PROC
#define LABEL
#define ENDP
#define EXPORT PUBLIC
#elif defined (__GNUC__) //GCC
.syntax unified
.cpu cortex-m4
.fpu softvfp
#define THUMB .thumb
#define CODESECT .section text
#define EXPORT .global
#define PROC :
#define LABEL :
#define ENDP
#define END
#endif
CODESECT
THUMB
;/*
;* @brief In-place bit reversal function.
;* @param[in, out] *pSrc points to the in-place buffer of unknown 32-bit data type.
;* @param[in] bitRevLen bit reversal table length
;* @param[in] *pBitRevTab points to bit reversal table.
;* @return none.
;*/
EXPORT arm_bitreversal_32
#if defined(ARM_MATH_CM0) || defined(ARM_MATH_CM0PLUS)
arm_bitreversal_32 PROC
ADDS r3,r1,#1
PUSH {r4-r6}
ADDS r1,r2,#0
LSRS r3,r3,#1
arm_bitreversal_32_0 LABEL
LDRH r2,[r1,#2]
LDRH r6,[r1,#0]
ADD r2,r0,r2
ADD r6,r0,r6
LDR r5,[r2,#0]
LDR r4,[r6,#0]
STR r5,[r6,#0]
STR r4,[r2,#0]
LDR r5,[r2,#4]
LDR r4,[r6,#4]
STR r5,[r6,#4]
STR r4,[r2,#4]
ADDS r1,r1,#4
SUBS r3,r3,#1
BNE arm_bitreversal_32_0
POP {r4-r6}
BX lr
ENDP
#else
arm_bitreversal_32 PROC
ADDS r3,r1,#1
CMP r3,#1
IT LS
BXLS lr
PUSH {r4-r9}
ADDS r1,r2,#2
LSRS r3,r3,#2
arm_bitreversal_32_0 LABEL ;/* loop unrolled by 2 */
LDRH r8,[r1,#4]
LDRH r9,[r1,#2]
LDRH r2,[r1,#0]
LDRH r12,[r1,#-2]
ADD r8,r0,r8
ADD r9,r0,r9
ADD r2,r0,r2
ADD r12,r0,r12
LDR r7,[r9,#0]
LDR r6,[r8,#0]
LDR r5,[r2,#0]
LDR r4,[r12,#0]
STR r6,[r9,#0]
STR r7,[r8,#0]
STR r5,[r12,#0]
STR r4,[r2,#0]
LDR r7,[r9,#4]
LDR r6,[r8,#4]
LDR r5,[r2,#4]
LDR r4,[r12,#4]
STR r6,[r9,#4]
STR r7,[r8,#4]
STR r5,[r12,#4]
STR r4,[r2,#4]
ADDS r1,r1,#8
SUBS r3,r3,#1
BNE arm_bitreversal_32_0
POP {r4-r9}
BX lr
ENDP
#endif
END

View File

@ -0,0 +1,616 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_cfft_f32.c
*
* Description: Combined Radix Decimation in Frequency CFFT Floating point processing function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
extern void arm_radix8_butterfly_f32(
float32_t * pSrc,
uint16_t fftLen,
const float32_t * pCoef,
uint16_t twidCoefModifier);
extern void arm_bitreversal_32(
uint32_t * pSrc,
const uint16_t bitRevLen,
const uint16_t * pBitRevTable);
/**
* @ingroup groupTransforms
*/
/**
* @defgroup ComplexFFT Complex FFT Functions
*
* \par
* The Fast Fourier Transform (FFT) is an efficient algorithm for computing the
* Discrete Fourier Transform (DFT). The FFT can be orders of magnitude faster
* than the DFT, especially for long lengths.
* The algorithms described in this section
* operate on complex data. A separate set of functions is devoted to handling
* of real sequences.
* \par
* There are separate algorithms for handling floating-point, Q15, and Q31 data
* types. The algorithms available for each data type are described next.
* \par
* The FFT functions operate in-place. That is, the array holding the input data
* will also be used to hold the corresponding result. The input data is complex
* and contains <code>2*fftLen</code> interleaved values as shown below.
* <pre> {real[0], imag[0], real[1], imag[1],..} </pre>
* The FFT result will be contained in the same array and the frequency domain
* values will have the same interleaving.
*
* \par Floating-point
* The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-8
* stages are performed along with a single radix-2 or radix-4 stage, as needed.
* The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
* a different twiddle factor table.
* \par
* The function uses the standard FFT definition and output values may grow by a
* factor of <code>fftLen</code> when computing the forward transform. The
* inverse transform includes a scale of <code>1/fftLen</code> as part of the
* calculation and this matches the textbook definition of the inverse FFT.
* \par
* Preinitialized data structures containing twiddle factors and bit reversal
* tables are provided and defined in <code>arm_const_structs.h</code>. Include
* this header in your function and then pass one of the constant structures as
* an argument to arm_cfft_f32. For example:
* \par
* <code>arm_cfft_f32(arm_cfft_sR_f32_len64, pSrc, 1, 1)</code>
* \par
* computes a 64-point inverse complex FFT including bit reversal.
* The data structures are treated as constant data and not modified during the
* calculation. The same data structure can be reused for multiple transforms
* including mixing forward and inverse transforms.
* \par
* Earlier releases of the library provided separate radix-2 and radix-4
* algorithms that operated on floating-point data. These functions are still
* provided but are deprecated. The older functions are slower and less general
* than the new functions.
* \par
* An example of initialization of the constants for the arm_cfft_f32 function follows:
* \par
* const static arm_cfft_instance_f32 *S;
* ...
* switch (length) {
* case 16:
* S = & arm_cfft_sR_f32_len16;
* break;
* case 32:
* S = & arm_cfft_sR_f32_len32;
* break;
* case 64:
* S = & arm_cfft_sR_f32_len64;
* break;
* case 128:
* S = & arm_cfft_sR_f32_len128;
* break;
* case 256:
* S = & arm_cfft_sR_f32_len256;
* break;
* case 512:
* S = & arm_cfft_sR_f32_len512;
* break;
* case 1024:
* S = & arm_cfft_sR_f32_len1024;
* break;
* case 2048:
* S = & arm_cfft_sR_f32_len2048;
* break;
* case 4096:
* S = & arm_cfft_sR_f32_len4096;
* break;
* }
* \par Q15 and Q31
* The library provides radix-2 and radix-4 FFT algorithms for fixed-point data. The
* radix-2 algorithm supports lengths of [16, 32, 64, ..., 4096]. The radix-4
* algorithm supports lengths of [16, 64, 256, ..., 4096]. When possible, you
* should use the radix-4 algorithm since it is faster than the radix-2 of the
* same length.
* \par
* The forward FFTs include scaling in order to prevent results from overflowing.
* Intermediate results are scaled down during each butterfly stage. In the
* radix-2 algorithm, a scale of 0.5 is applied during each butterfly. In the
* radix-4 algorithm, a scale of 0.25 is applied. The scaling applies to both
* the forward and the inverse FFTs. Thus the forward FFT contains an additional
* scale factor of <code>1/fftLen</code> as compared to the standard textbook
* definition of the FFT. The inverse FFT also scales down during each butterfly
* stage and this corresponds to the standard textbook definition.
* \par
* A separate instance structure must be defined for each transform used but
* twiddle factor and bit reversal tables can be reused.
* \par
* There is also an associated initialization function for each data type.
* The initialization function performs the following operations:
* - Sets the values of the internal structure fields.
* - Initializes twiddle factor table and bit reversal table pointers.
* \par
* Use of the initialization function is optional.
* However, if the initialization function is used, then the instance structure
* cannot be placed into a const data section. To place an instance structure
* into a const data section, the instance structure should be manually
* initialized as follows:
* <pre>
*arm_cfft_radix2_instance_q31 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};
*arm_cfft_radix2_instance_q15 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};
*arm_cfft_radix4_instance_q31 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};
*arm_cfft_radix4_instance_q15 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};
*arm_cfft_instance_f32 S = {fftLen, pTwiddle, pBitRevTable, bitRevLength};
* </pre>
* \par
* where <code>fftLen</code> length of CFFT/CIFFT; <code>ifftFlag</code> Flag for
* selection of forward or inverse transform. When ifftFlag is set the inverse
* transform is calculated.
* <code>bitReverseFlag</code> Flag for selection of output order (Set bitReverseFlag to output in normal order otherwise output in bit reversed order);
* <code>pTwiddle</code>points to array of twiddle coefficients; <code>pBitRevTable</code> points to the bit reversal table.
* <code>twidCoefModifier</code> modifier for twiddle factor table which supports all FFT lengths with same table;
* <code>pBitRevTable</code> modifier for bit reversal table which supports all FFT lengths with same table.
* <code>onebyfftLen</code> value of 1/fftLen to calculate CIFFT;
* \par
* The Q15 and Q31 FFT functions use a large bit reversal and twiddle factor
* table. The tables are defined for the maximum length transform and a subset
* of the coefficients are used in shorter transforms.
*
*/
void arm_cfft_radix8by2_f32( arm_cfft_instance_f32 * S, float32_t * p1)
{
uint32_t L = S->fftLen;
float32_t * pCol1, * pCol2, * pMid1, * pMid2;
float32_t * p2 = p1 + L;
const float32_t * tw = (float32_t *) S->pTwiddle;
float32_t t1[4], t2[4], t3[4], t4[4], twR, twI;
float32_t m0, m1, m2, m3;
uint32_t l;
pCol1 = p1;
pCol2 = p2;
// Define new length
L >>= 1;
// Initialize mid pointers
pMid1 = p1 + L;
pMid2 = p2 + L;
// do two dot Fourier transform
for ( l = L >> 2; l > 0; l-- )
{
t1[0] = p1[0];
t1[1] = p1[1];
t1[2] = p1[2];
t1[3] = p1[3];
t2[0] = p2[0];
t2[1] = p2[1];
t2[2] = p2[2];
t2[3] = p2[3];
t3[0] = pMid1[0];
t3[1] = pMid1[1];
t3[2] = pMid1[2];
t3[3] = pMid1[3];
t4[0] = pMid2[0];
t4[1] = pMid2[1];
t4[2] = pMid2[2];
t4[3] = pMid2[3];
*p1++ = t1[0] + t2[0];
*p1++ = t1[1] + t2[1];
*p1++ = t1[2] + t2[2];
*p1++ = t1[3] + t2[3]; // col 1
t2[0] = t1[0] - t2[0];
t2[1] = t1[1] - t2[1];
t2[2] = t1[2] - t2[2];
t2[3] = t1[3] - t2[3]; // for col 2
*pMid1++ = t3[0] + t4[0];
*pMid1++ = t3[1] + t4[1];
*pMid1++ = t3[2] + t4[2];
*pMid1++ = t3[3] + t4[3]; // col 1
t4[0] = t4[0] - t3[0];
t4[1] = t4[1] - t3[1];
t4[2] = t4[2] - t3[2];
t4[3] = t4[3] - t3[3]; // for col 2
twR = *tw++;
twI = *tw++;
// multiply by twiddle factors
m0 = t2[0] * twR;
m1 = t2[1] * twI;
m2 = t2[1] * twR;
m3 = t2[0] * twI;
// R = R * Tr - I * Ti
*p2++ = m0 + m1;
// I = I * Tr + R * Ti
*p2++ = m2 - m3;
// use vertical symmetry
// 0.9988 - 0.0491i <==> -0.0491 - 0.9988i
m0 = t4[0] * twI;
m1 = t4[1] * twR;
m2 = t4[1] * twI;
m3 = t4[0] * twR;
*pMid2++ = m0 - m1;
*pMid2++ = m2 + m3;
twR = *tw++;
twI = *tw++;
m0 = t2[2] * twR;
m1 = t2[3] * twI;
m2 = t2[3] * twR;
m3 = t2[2] * twI;
*p2++ = m0 + m1;
*p2++ = m2 - m3;
m0 = t4[2] * twI;
m1 = t4[3] * twR;
m2 = t4[3] * twI;
m3 = t4[2] * twR;
*pMid2++ = m0 - m1;
*pMid2++ = m2 + m3;
}
// first col
arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 2u);
// second col
arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 2u);
}
void arm_cfft_radix8by4_f32( arm_cfft_instance_f32 * S, float32_t * p1)
{
uint32_t L = S->fftLen >> 1;
float32_t * pCol1, *pCol2, *pCol3, *pCol4, *pEnd1, *pEnd2, *pEnd3, *pEnd4;
const float32_t *tw2, *tw3, *tw4;
float32_t * p2 = p1 + L;
float32_t * p3 = p2 + L;
float32_t * p4 = p3 + L;
float32_t t2[4], t3[4], t4[4], twR, twI;
float32_t p1ap3_0, p1sp3_0, p1ap3_1, p1sp3_1;
float32_t m0, m1, m2, m3;
uint32_t l, twMod2, twMod3, twMod4;
pCol1 = p1; // points to real values by default
pCol2 = p2;
pCol3 = p3;
pCol4 = p4;
pEnd1 = p2 - 1; // points to imaginary values by default
pEnd2 = p3 - 1;
pEnd3 = p4 - 1;
pEnd4 = pEnd3 + L;
tw2 = tw3 = tw4 = (float32_t *) S->pTwiddle;
L >>= 1;
// do four dot Fourier transform
twMod2 = 2;
twMod3 = 4;
twMod4 = 6;
// TOP
p1ap3_0 = p1[0] + p3[0];
p1sp3_0 = p1[0] - p3[0];
p1ap3_1 = p1[1] + p3[1];
p1sp3_1 = p1[1] - p3[1];
// col 2
t2[0] = p1sp3_0 + p2[1] - p4[1];
t2[1] = p1sp3_1 - p2[0] + p4[0];
// col 3
t3[0] = p1ap3_0 - p2[0] - p4[0];
t3[1] = p1ap3_1 - p2[1] - p4[1];
// col 4
t4[0] = p1sp3_0 - p2[1] + p4[1];
t4[1] = p1sp3_1 + p2[0] - p4[0];
// col 1
*p1++ = p1ap3_0 + p2[0] + p4[0];
*p1++ = p1ap3_1 + p2[1] + p4[1];
// Twiddle factors are ones
*p2++ = t2[0];
*p2++ = t2[1];
*p3++ = t3[0];
*p3++ = t3[1];
*p4++ = t4[0];
*p4++ = t4[1];
tw2 += twMod2;
tw3 += twMod3;
tw4 += twMod4;
for (l = (L - 2) >> 1; l > 0; l-- )
{
// TOP
p1ap3_0 = p1[0] + p3[0];
p1sp3_0 = p1[0] - p3[0];
p1ap3_1 = p1[1] + p3[1];
p1sp3_1 = p1[1] - p3[1];
// col 2
t2[0] = p1sp3_0 + p2[1] - p4[1];
t2[1] = p1sp3_1 - p2[0] + p4[0];
// col 3
t3[0] = p1ap3_0 - p2[0] - p4[0];
t3[1] = p1ap3_1 - p2[1] - p4[1];
// col 4
t4[0] = p1sp3_0 - p2[1] + p4[1];
t4[1] = p1sp3_1 + p2[0] - p4[0];
// col 1 - top
*p1++ = p1ap3_0 + p2[0] + p4[0];
*p1++ = p1ap3_1 + p2[1] + p4[1];
// BOTTOM
p1ap3_1 = pEnd1[-1] + pEnd3[-1];
p1sp3_1 = pEnd1[-1] - pEnd3[-1];
p1ap3_0 = pEnd1[0] + pEnd3[0];
p1sp3_0 = pEnd1[0] - pEnd3[0];
// col 2
t2[2] = pEnd2[0] - pEnd4[0] + p1sp3_1;
t2[3] = pEnd1[0] - pEnd3[0] - pEnd2[-1] + pEnd4[-1];
// col 3
t3[2] = p1ap3_1 - pEnd2[-1] - pEnd4[-1];
t3[3] = p1ap3_0 - pEnd2[0] - pEnd4[0];
// col 4
t4[2] = pEnd2[0] - pEnd4[0] - p1sp3_1;
t4[3] = pEnd4[-1] - pEnd2[-1] - p1sp3_0;
// col 1 - Bottom
*pEnd1-- = p1ap3_0 + pEnd2[0] + pEnd4[0];
*pEnd1-- = p1ap3_1 + pEnd2[-1] + pEnd4[-1];
// COL 2
// read twiddle factors
twR = *tw2++;
twI = *tw2++;
// multiply by twiddle factors
// let Z1 = a + i(b), Z2 = c + i(d)
// => Z1 * Z2 = (a*c - b*d) + i(b*c + a*d)
// Top
m0 = t2[0] * twR;
m1 = t2[1] * twI;
m2 = t2[1] * twR;
m3 = t2[0] * twI;
*p2++ = m0 + m1;
*p2++ = m2 - m3;
// use vertical symmetry col 2
// 0.9997 - 0.0245i <==> 0.0245 - 0.9997i
// Bottom
m0 = t2[3] * twI;
m1 = t2[2] * twR;
m2 = t2[2] * twI;
m3 = t2[3] * twR;
*pEnd2-- = m0 - m1;
*pEnd2-- = m2 + m3;
// COL 3
twR = tw3[0];
twI = tw3[1];
tw3 += twMod3;
// Top
m0 = t3[0] * twR;
m1 = t3[1] * twI;
m2 = t3[1] * twR;
m3 = t3[0] * twI;
*p3++ = m0 + m1;
*p3++ = m2 - m3;
// use vertical symmetry col 3
// 0.9988 - 0.0491i <==> -0.9988 - 0.0491i
// Bottom
m0 = -t3[3] * twR;
m1 = t3[2] * twI;
m2 = t3[2] * twR;
m3 = t3[3] * twI;
*pEnd3-- = m0 - m1;
*pEnd3-- = m3 - m2;
// COL 4
twR = tw4[0];
twI = tw4[1];
tw4 += twMod4;
// Top
m0 = t4[0] * twR;
m1 = t4[1] * twI;
m2 = t4[1] * twR;
m3 = t4[0] * twI;
*p4++ = m0 + m1;
*p4++ = m2 - m3;
// use vertical symmetry col 4
// 0.9973 - 0.0736i <==> -0.0736 + 0.9973i
// Bottom
m0 = t4[3] * twI;
m1 = t4[2] * twR;
m2 = t4[2] * twI;
m3 = t4[3] * twR;
*pEnd4-- = m0 - m1;
*pEnd4-- = m2 + m3;
}
//MIDDLE
// Twiddle factors are
// 1.0000 0.7071-0.7071i -1.0000i -0.7071-0.7071i
p1ap3_0 = p1[0] + p3[0];
p1sp3_0 = p1[0] - p3[0];
p1ap3_1 = p1[1] + p3[1];
p1sp3_1 = p1[1] - p3[1];
// col 2
t2[0] = p1sp3_0 + p2[1] - p4[1];
t2[1] = p1sp3_1 - p2[0] + p4[0];
// col 3
t3[0] = p1ap3_0 - p2[0] - p4[0];
t3[1] = p1ap3_1 - p2[1] - p4[1];
// col 4
t4[0] = p1sp3_0 - p2[1] + p4[1];
t4[1] = p1sp3_1 + p2[0] - p4[0];
// col 1 - Top
*p1++ = p1ap3_0 + p2[0] + p4[0];
*p1++ = p1ap3_1 + p2[1] + p4[1];
// COL 2
twR = tw2[0];
twI = tw2[1];
m0 = t2[0] * twR;
m1 = t2[1] * twI;
m2 = t2[1] * twR;
m3 = t2[0] * twI;
*p2++ = m0 + m1;
*p2++ = m2 - m3;
// COL 3
twR = tw3[0];
twI = tw3[1];
m0 = t3[0] * twR;
m1 = t3[1] * twI;
m2 = t3[1] * twR;
m3 = t3[0] * twI;
*p3++ = m0 + m1;
*p3++ = m2 - m3;
// COL 4
twR = tw4[0];
twI = tw4[1];
m0 = t4[0] * twR;
m1 = t4[1] * twI;
m2 = t4[1] * twR;
m3 = t4[0] * twI;
*p4++ = m0 + m1;
*p4++ = m2 - m3;
// first col
arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 4u);
// second col
arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 4u);
// third col
arm_radix8_butterfly_f32( pCol3, L, (float32_t *) S->pTwiddle, 4u);
// fourth col
arm_radix8_butterfly_f32( pCol4, L, (float32_t *) S->pTwiddle, 4u);
}
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the floating-point complex FFT.
* @param[in] *S points to an instance of the floating-point CFFT structure.
* @param[in, out] *p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return none.
*/
void arm_cfft_f32(
const arm_cfft_instance_f32 * S,
float32_t * p1,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
uint32_t L = S->fftLen, l;
float32_t invL, * pSrc;
if(ifftFlag == 1u)
{
/* Conjugate input data */
pSrc = p1 + 1;
for(l=0; l<L; l++) {
*pSrc = -*pSrc;
pSrc += 2;
}
}
switch (L) {
case 16:
case 128:
case 1024:
arm_cfft_radix8by2_f32 ( (arm_cfft_instance_f32 *) S, p1);
break;
case 32:
case 256:
case 2048:
arm_cfft_radix8by4_f32 ( (arm_cfft_instance_f32 *) S, p1);
break;
case 64:
case 512:
case 4096:
arm_radix8_butterfly_f32( p1, L, (float32_t *) S->pTwiddle, 1);
break;
}
if( bitReverseFlag )
arm_bitreversal_32((uint32_t*)p1,S->bitRevLength,S->pBitRevTable);
if(ifftFlag == 1u)
{
invL = 1.0f/(float32_t)L;
/* Conjugate and scale output data */
pSrc = p1;
for(l=0; l<L; l++) {
*pSrc++ *= invL ;
*pSrc = -(*pSrc) * invL;
pSrc++;
}
}
}

View File

@ -0,0 +1,485 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_f32.c
*
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
*
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
void arm_radix2_butterfly_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier);
void arm_radix2_butterfly_inverse_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier,
float32_t onebyfftLen);
extern void arm_bitreversal_f32(
float32_t * pSrc,
uint16_t fftSize,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Radix-2 CFFT/CIFFT.
* @deprecated Do not use this function. It has been superceded by \ref arm_cfft_f32 and will be removed
* in the future.
* @param[in] *S points to an instance of the floating-point Radix-2 CFFT/CIFFT structure.
* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @return none.
*/
void arm_cfft_radix2_f32(
const arm_cfft_radix2_instance_f32 * S,
float32_t * pSrc)
{
if(S->ifftFlag == 1u)
{
/* Complex IFFT radix-2 */
arm_radix2_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle,
S->twidCoefModifier, S->onebyfftLen);
}
else
{
/* Complex FFT radix-2 */
arm_radix2_butterfly_f32(pSrc, S->fftLen, S->pTwiddle,
S->twidCoefModifier);
}
if(S->bitReverseFlag == 1u)
{
/* Bit Reversal */
arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
}
/**
* @} end of ComplexFFT group
*/
/* ----------------------------------------------------------------------
** Internal helper function used by the FFTs
** ------------------------------------------------------------------- */
/*
* @brief Core function for the floating-point CFFT butterfly process.
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
* @param[in] fftLen length of the FFT.
* @param[in] *pCoef points to the twiddle coefficient buffer.
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_radix2_butterfly_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier)
{
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
float32_t xt, yt, cosVal, sinVal;
float32_t p0, p1, p2, p3;
float32_t a0, a1;
#ifndef ARM_MATH_CM0_FAMILY
/* Initializations for the first stage */
n2 = fftLen >> 1;
ia = 0;
i = 0;
// loop for groups
for (k = n2; k > 0; k--)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
/* Twiddle coefficients index modifier */
ia += twidCoefModifier;
/* index calculation for the input as, */
/* pSrc[i + 0], pSrc[i + fftLen/1] */
l = i + n2;
/* Butterfly implementation */
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 + p1;
pSrc[2 * l + 1] = p2 - p3;
i++;
} // groups loop end
twidCoefModifier <<= 1u;
// loop for stage
for (k = n2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 + p1;
pSrc[2 * l + 1] = p2 - p3;
i += n1;
} while( i < fftLen ); // butterfly loop end
j++;
} while( j < n2); // groups loop end
twidCoefModifier <<= 1u;
} // stages loop end
// loop for butterfly
for (i = 0; i < fftLen; i += 2)
{
a0 = pSrc[2 * i] + pSrc[2 * i + 2];
xt = pSrc[2 * i] - pSrc[2 * i + 2];
yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * i + 2] = xt;
pSrc[2 * i + 3] = yt;
} // groups loop end
#else
n2 = fftLen;
// loop for stage
for (k = fftLen; k > 1; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 + p1;
pSrc[2 * l + 1] = p2 - p3;
i += n1;
} while(i < fftLen);
j++;
} while(j < n2);
twidCoefModifier <<= 1u;
}
#endif // #ifndef ARM_MATH_CM0_FAMILY
}
void arm_radix2_butterfly_inverse_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier,
float32_t onebyfftLen)
{
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
float32_t xt, yt, cosVal, sinVal;
float32_t p0, p1, p2, p3;
float32_t a0, a1;
#ifndef ARM_MATH_CM0_FAMILY
n2 = fftLen >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 - p1;
pSrc[2 * l + 1] = p2 + p3;
} // groups loop end
twidCoefModifier <<= 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 - p1;
pSrc[2 * l + 1] = p2 + p3;
i += n1;
} while( i < fftLen ); // butterfly loop end
j++;
} while(j < n2); // groups loop end
twidCoefModifier <<= 1u;
} // stages loop end
// loop for butterfly
for (i = 0; i < fftLen; i += 2)
{
a0 = pSrc[2 * i] + pSrc[2 * i + 2];
xt = pSrc[2 * i] - pSrc[2 * i + 2];
a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
p0 = a0 * onebyfftLen;
p2 = xt * onebyfftLen;
p1 = a1 * onebyfftLen;
p3 = yt * onebyfftLen;
pSrc[2 * i] = p0;
pSrc[2 * i + 1] = p1;
pSrc[2 * i + 2] = p2;
pSrc[2 * i + 3] = p3;
} // butterfly loop end
#else
n2 = fftLen;
// loop for stage
for (k = fftLen; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 - p1;
pSrc[2 * l + 1] = p2 + p3;
i += n1;
} while( i < fftLen ); // butterfly loop end
j++;
} while( j < n2 ); // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
p0 = a0 * onebyfftLen;
p2 = xt * onebyfftLen;
p1 = a1 * onebyfftLen;
p3 = yt * onebyfftLen;
pSrc[2 * i] = p0;
pSrc[2u * l] = p2;
pSrc[2 * i + 1] = p1;
pSrc[2u * l + 1u] = p3;
} // butterfly loop end
#endif // #ifndef ARM_MATH_CM0_FAMILY
}

View File

@ -0,0 +1,205 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_f32.c
*
* Description: Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @brief Initialization function for the floating-point CFFT/CIFFT.
* @deprecated Do not use this function. It has been superceded by \ref arm_cfft_f32 and will be removed
* in the future.
* @param[in,out] *S points to an instance of the floating-point CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix2_init_f32(
arm_cfft_radix2_instance_f32 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (float32_t *) twiddleCoef;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096u:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.000244140625;
break;
case 2048u:
/* Initializations of structure parameters for 2048 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 2u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 2u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.00048828125;
break;
case 1024u:
/* Initializations of structure parameters for 1024 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.0009765625f;
break;
case 512u:
/* Initializations of structure parameters for 512 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 8u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 8u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.001953125;
break;
case 256u:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16u;
S->bitRevFactor = 16u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
S->onebyfftLen = 0.00390625f;
break;
case 128u:
/* Initializations of structure parameters for 128 point FFT */
S->twidCoefModifier = 32u;
S->bitRevFactor = 32u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
S->onebyfftLen = 0.0078125;
break;
case 64u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64u;
S->bitRevFactor = 64u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
S->onebyfftLen = 0.015625f;
break;
case 32u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 128u;
S->bitRevFactor = 128u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
S->onebyfftLen = 0.03125;
break;
case 16u:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256u;
S->bitRevFactor = 256u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
S->onebyfftLen = 0.0625f;
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View File

@ -0,0 +1,188 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_init_q15.c
*
* Description: Radix-2 Decimation in Frequency Q15 FFT & IFFT initialization function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @brief Initialization function for the Q15 CFFT/CIFFT.
* @param[in,out] *S points to an instance of the Q15 CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix2_init_q15(
arm_cfft_radix2_instance_q15 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q15_t *) twiddleCoefQ15;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096u:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
case 2048u:
/* Initializations of structure parameters for 2048 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 2u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 2u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
break;
case 1024u:
/* Initializations of structure parameters for 1024 point FFT */
S->twidCoefModifier = 4u;
S->bitRevFactor = 4u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
case 512u:
/* Initializations of structure parameters for 512 point FFT */
S->twidCoefModifier = 8u;
S->bitRevFactor = 8u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
break;
case 256u:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16u;
S->bitRevFactor = 16u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 128u:
/* Initializations of structure parameters for 128 point FFT */
S->twidCoefModifier = 32u;
S->bitRevFactor = 32u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
break;
case 64u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64u;
S->bitRevFactor = 64u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 32u:
/* Initializations of structure parameters for 32 point FFT */
S->twidCoefModifier = 128u;
S->bitRevFactor = 128u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
break;
case 16u:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256u;
S->bitRevFactor = 256u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View File

@ -0,0 +1,186 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_init_q31.c
*
* Description: Radix-2 Decimation in Frequency Fixed-point CFFT & CIFFT Initialization function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
*
* @brief Initialization function for the Q31 CFFT/CIFFT.
* @param[in,out] *S points to an instance of the Q31 CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix2_init_q31(
arm_cfft_radix2_instance_q31 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q31_t *) twiddleCoefQ31;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen)
{
/* Initializations of structure parameters for 4096 point FFT */
case 4096u:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
/* Initializations of structure parameters for 2048 point FFT */
case 2048u:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 2u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 2u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
break;
/* Initializations of structure parameters for 1024 point FFT */
case 1024u:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
/* Initializations of structure parameters for 512 point FFT */
case 512u:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 8u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 8u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
break;
case 256u:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16u;
S->bitRevFactor = 16u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 128u:
/* Initializations of structure parameters for 128 point FFT */
S->twidCoefModifier = 32u;
S->bitRevFactor = 32u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
break;
case 64u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64u;
S->bitRevFactor = 64u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 32u:
/* Initializations of structure parameters for 32 point FFT */
S->twidCoefModifier = 128u;
S->bitRevFactor = 128u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
break;
case 16u:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256u;
S->bitRevFactor = 256u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View File

@ -0,0 +1,741 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_q15.c
*
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
*
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
void arm_radix2_butterfly_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint16_t twidCoefModifier);
void arm_radix2_butterfly_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint16_t twidCoefModifier);
void arm_bitreversal_q15(
q15_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the fixed-point CFFT/CIFFT.
* @param[in] *S points to an instance of the fixed-point CFFT/CIFFT structure.
* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @return none.
*/
void arm_cfft_radix2_q15(
const arm_cfft_radix2_instance_q15 * S,
q15_t * pSrc)
{
if(S->ifftFlag == 1u)
{
arm_radix2_butterfly_inverse_q15(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
else
{
arm_radix2_butterfly_q15(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
/**
* @} end of ComplexFFT group
*/
void arm_radix2_butterfly_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint16_t twidCoefModifier)
{
#ifndef ARM_MATH_CM0_FAMILY
unsigned i, j, k, l;
unsigned n1, n2, ia;
q15_t in;
q31_t T, S, R;
q31_t coeff, out1, out2;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 2;
T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
S = _SIMD32_OFFSET(pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 2;
S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16u;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
// loop for butterfly
i++;
l++;
T = _SIMD32_OFFSET(pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 2;
T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
S = _SIMD32_OFFSET(pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 2;
S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16u;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16u;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
i += n1;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16u;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
_SIMD32_OFFSET(pSrc + (2u * l)) = R;
i += n1;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
_SIMD32_OFFSET(pSrc + (2u * l)) = R;
} // groups loop end
#else
unsigned i, j, k, l;
unsigned n1, n2, ia;
q15_t xt, yt, cosVal, sinVal;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = (pSrc[2 * i] >> 2u) - (pSrc[2 * l] >> 2u);
pSrc[2 * i] = ((pSrc[2 * i] >> 2u) + (pSrc[2 * l] >> 2u)) >> 1u;
yt = (pSrc[2 * i + 1] >> 2u) - (pSrc[2 * l + 1] >> 2u);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 2u) + (pSrc[2 * i + 1] >> 2u)) >> 1u;
pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1u;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1u;
pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2u * l] = xt;
pSrc[2u * l + 1u] = yt;
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
#endif // #ifndef ARM_MATH_CM0_FAMILY
}
void arm_radix2_butterfly_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint16_t twidCoefModifier)
{
#ifndef ARM_MATH_CM0_FAMILY
unsigned i, j, k, l;
unsigned n1, n2, ia;
q15_t in;
q31_t T, S, R;
q31_t coeff, out1, out2;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 2;
T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
S = _SIMD32_OFFSET(pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 2;
S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16u;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
// loop for butterfly
i++;
l++;
T = _SIMD32_OFFSET(pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 2;
T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
S = _SIMD32_OFFSET(pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 2;
S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16u;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16u;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
i += n1;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16u;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2u * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
_SIMD32_OFFSET(pSrc + (2u * l)) = R;
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
#else
unsigned i, j, k, l;
unsigned n1, n2, ia;
q15_t xt, yt, cosVal, sinVal;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = (pSrc[2 * i] >> 2u) - (pSrc[2 * l] >> 2u);
pSrc[2 * i] = ((pSrc[2 * i] >> 2u) + (pSrc[2 * l] >> 2u)) >> 1u;
yt = (pSrc[2 * i + 1] >> 2u) - (pSrc[2 * l + 1] >> 2u);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 2u) + (pSrc[2 * i + 1] >> 2u)) >> 1u;
pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1u;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1u;
pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2u * l] = xt;
pSrc[2u * l + 1u] = yt;
} // groups loop end
#endif // #ifndef ARM_MATH_CM0_FAMILY
}

View File

@ -0,0 +1,350 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_q31.c
*
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
*
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
void arm_radix2_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint16_t twidCoefModifier);
void arm_radix2_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint16_t twidCoefModifier);
void arm_bitreversal_q31(
q31_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the fixed-point CFFT/CIFFT.
* @param[in] *S points to an instance of the fixed-point CFFT/CIFFT structure.
* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @return none.
*/
void arm_cfft_radix2_q31(
const arm_cfft_radix2_instance_q31 * S,
q31_t * pSrc)
{
if(S->ifftFlag == 1u)
{
arm_radix2_butterfly_inverse_q31(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
else
{
arm_radix2_butterfly_q31(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
/**
* @} end of ComplexFFT group
*/
void arm_radix2_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint16_t twidCoefModifier)
{
unsigned i, j, k, l, m;
unsigned n1, n2, ia;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
l = i + n2;
xt = (pSrc[2 * i] >> 2u) - (pSrc[2 * l] >> 2u);
pSrc[2 * i] = ((pSrc[2 * i] >> 2u) + (pSrc[2 * l] >> 2u)) >> 1u;
yt = (pSrc[2 * i + 1] >> 2u) - (pSrc[2 * l + 1] >> 2u);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 2u) + (pSrc[2 * i + 1] >> 2u)) >> 1u;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multAcc_32x32_keep32_R(p0, yt, sinVal);
multSub_32x32_keep32_R(p1, xt, sinVal);
pSrc[2u * l] = p0;
pSrc[2u * l + 1u] = p1;
} // groups loop end
twidCoefModifier <<= 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
i = j;
m = fftLen / n1;
do
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1u;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1u;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multAcc_32x32_keep32_R(p0, yt, sinVal);
multSub_32x32_keep32_R(p1, xt, sinVal);
pSrc[2u * l] = p0;
pSrc[2u * l + 1u] = p1;
i += n1;
m--;
} while( m > 0); // butterfly loop end
} // groups loop end
twidCoefModifier <<= 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2u * l] = xt;
pSrc[2u * l + 1u] = yt;
i += n1;
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2u * l] = xt;
pSrc[2u * l + 1u] = yt;
} // butterfly loop end
}
void arm_radix2_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint16_t twidCoefModifier)
{
unsigned i, j, k, l;
unsigned n1, n2, ia;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
l = i + n2;
xt = (pSrc[2 * i] >> 2u) - (pSrc[2 * l] >> 2u);
pSrc[2 * i] = ((pSrc[2 * i] >> 2u) + (pSrc[2 * l] >> 2u)) >> 1u;
yt = (pSrc[2 * i + 1] >> 2u) - (pSrc[2 * l + 1] >> 2u);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 2u) + (pSrc[2 * i + 1] >> 2u)) >> 1u;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multSub_32x32_keep32_R(p0, yt, sinVal);
multAcc_32x32_keep32_R(p1, xt, sinVal);
pSrc[2u * l] = p0;
pSrc[2u * l + 1u] = p1;
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1u;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1u;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multSub_32x32_keep32_R(p0, yt, sinVal);
multAcc_32x32_keep32_R(p1, xt, sinVal);
pSrc[2u * l] = p0;
pSrc[2u * l + 1u] = p1;
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1u;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2u * l] = xt;
pSrc[2u * l + 1u] = yt;
i += n1;
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2u * l] = xt;
pSrc[2u * l + 1u] = yt;
} // butterfly loop end
}

View File

@ -0,0 +1,165 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_f32.c
*
* Description: Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @brief Initialization function for the floating-point CFFT/CIFFT.
* @deprecated Do not use this function. It has been superceded by \ref arm_cfft_f32 and will be removed
* in the future.
* @param[in,out] *S points to an instance of the floating-point CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix4_init_f32(
arm_cfft_radix4_instance_f32 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (float32_t *) twiddleCoef;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096u:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.000244140625;
break;
case 1024u:
/* Initializations of structure parameters for 1024 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.0009765625f;
break;
case 256u:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16u;
S->bitRevFactor = 16u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
S->onebyfftLen = 0.00390625f;
break;
case 64u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64u;
S->bitRevFactor = 64u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
S->onebyfftLen = 0.015625f;
break;
case 16u:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256u;
S->bitRevFactor = 256u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
S->onebyfftLen = 0.0625f;
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View File

@ -0,0 +1,151 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_q15.c
*
* Description: Radix-4 Decimation in Frequency Q15 FFT & IFFT initialization function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @brief Initialization function for the Q15 CFFT/CIFFT.
* @param[in,out] *S points to an instance of the Q15 CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix4_init_q15(
arm_cfft_radix4_instance_q15 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q15_t *) twiddleCoefQ15;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096u:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
case 1024u:
/* Initializations of structure parameters for 1024 point FFT */
S->twidCoefModifier = 4u;
S->bitRevFactor = 4u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
case 256u:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16u;
S->bitRevFactor = 16u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 64u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64u;
S->bitRevFactor = 64u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 16u:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256u;
S->bitRevFactor = 256u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View File

@ -0,0 +1,147 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_q31.c
*
* Description: Radix-4 Decimation in Frequency Q31 FFT & IFFT initialization function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
*
* @brief Initialization function for the Q31 CFFT/CIFFT.
* @param[in,out] *S points to an instance of the Q31 CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix4_init_q31(
arm_cfft_radix4_instance_q31 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q31_t *) twiddleCoefQ31;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen)
{
/* Initializations of structure parameters for 4096 point FFT */
case 4096u:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
/* Initializations of structure parameters for 1024 point FFT */
case 1024u:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4u;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4u;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
case 256u:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16u;
S->bitRevFactor = 16u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 64u:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64u;
S->bitRevFactor = 64u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 16u:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256u;
S->bitRevFactor = 256u;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View File

@ -0,0 +1,911 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_q31.c
*
* Description: This file has function definition of Radix-4 FFT & IFFT function and
* In-place bit reversal using bit reversal table
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
void arm_radix4_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint32_t twidCoefModifier);
void arm_radix4_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint32_t twidCoefModifier);
void arm_bitreversal_q31(
q31_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the Q31 CFFT/CIFFT.
* @param[in] *S points to an instance of the Q31 CFFT/CIFFT structure.
* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @return none.
*
* \par Input and output formats:
* \par
* Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
* Hence the output format is different for different FFT sizes.
* The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:
* \par
* \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"
* \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"
*
*/
void arm_cfft_radix4_q31(
const arm_cfft_radix4_instance_q31 * S,
q31_t * pSrc)
{
if(S->ifftFlag == 1u)
{
/* Complex IFFT radix-4 */
arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle,
S->twidCoefModifier);
}
else
{
/* Complex FFT radix-4 */
arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle,
S->twidCoefModifier);
}
if(S->bitReverseFlag == 1u)
{
/* Bit Reversal */
arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
}
/**
* @} end of ComplexFFT group
*/
/*
* Radix-4 FFT algorithm used is :
*
* Input real and imaginary data:
* x(n) = xa + j * ya
* x(n+N/4 ) = xb + j * yb
* x(n+N/2 ) = xc + j * yc
* x(n+3N 4) = xd + j * yd
*
*
* Output real and imaginary data:
* x(4r) = xa'+ j * ya'
* x(4r+1) = xb'+ j * yb'
* x(4r+2) = xc'+ j * yc'
* x(4r+3) = xd'+ j * yd'
*
*
* Twiddle factors for radix-4 FFT:
* Wn = co1 + j * (- si1)
* W2n = co2 + j * (- si2)
* W3n = co3 + j * (- si3)
*
* Butterfly implementation:
* xa' = xa + xb + xc + xd
* ya' = ya + yb + yc + yd
* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
*
*/
/**
* @brief Core function for the Q31 CFFT butterfly process.
* @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
* @param[in] fftLen length of the FFT.
* @param[in] *pCoef points to twiddle coefficient buffer.
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_radix4_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint32_t twidCoefModifier)
{
uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
q31_t xa, xb, xc, xd;
q31_t ya, yb, yc, yd;
q31_t xa_out, xb_out, xc_out, xd_out;
q31_t ya_out, yb_out, yc_out, yd_out;
q31_t *ptr1;
q63_t xaya, xbyb, xcyc, xdyd;
/* Total process is divided into three stages */
/* process first stage, middle stages, & last stage */
/* start of first stage process */
/* Initializations for the first stage */
n2 = fftLen;
n1 = n2;
/* n2 = fftLen/4 */
n2 >>= 2u;
i0 = 0u;
ia1 = 0u;
j = n2;
/* Calculation of first stage */
do
{
/* index calculation for the input as, */
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
i1 = i0 + n2;
i2 = i1 + n2;
i3 = i2 + n2;
/* input is in 1.31(q31) format and provide 4 guard bits for the input */
/* Butterfly implementation */
/* xa + xc */
r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u);
/* xa - xc */
r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
/* xb + xd */
t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
/* ya + yc */
s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
/* ya - yc */
s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
/* xa' = xa + xb + xc + xd */
pSrc[2u * i0] = (r1 + t1);
/* (xa + xc) - (xb + xd) */
r1 = r1 - t1;
/* yb + yd */
t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
/* ya' = ya + yb + yc + yd */
pSrc[(2u * i0) + 1u] = (s1 + t2);
/* (ya + yc) - (yb + yd) */
s1 = s1 - t2;
/* yb - yd */
t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
/* xb - xd */
t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
/* index calculation for the coefficients */
ia2 = 2u * ia1;
co2 = pCoef[ia2 * 2u];
si2 = pCoef[(ia2 * 2u) + 1u];
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
/* (xa - xc) + (yb - yd) */
r1 = r2 + t1;
/* (xa - xc) - (yb - yd) */
r2 = r2 - t1;
/* (ya - yc) - (xb - xd) */
s1 = s2 - t2;
/* (ya - yc) + (xb - xd) */
s2 = s2 + t2;
co1 = pCoef[ia1 * 2u];
si1 = pCoef[(ia1 * 2u) + 1u];
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
/* index calculation for the coefficients */
ia3 = 3u * ia1;
co3 = pCoef[ia3 * 2u];
si3 = pCoef[(ia3 * 2u) + 1u];
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
/* Twiddle coefficients index modifier */
ia1 = ia1 + twidCoefModifier;
/* Updating input index */
i0 = i0 + 1u;
} while(--j);
/* end of first stage process */
/* data is in 5.27(q27) format */
/* start of Middle stages process */
/* each stage in middle stages provides two down scaling of the input */
twidCoefModifier <<= 2u;
for (k = fftLen / 4u; k > 4u; k >>= 2u)
{
/* Initializations for the first stage */
n1 = n2;
n2 >>= 2u;
ia1 = 0u;
/* Calculation of first stage */
for (j = 0u; j <= (n2 - 1u); j++)
{
/* index calculation for the coefficients */
ia2 = ia1 + ia1;
ia3 = ia2 + ia1;
co1 = pCoef[ia1 * 2u];
si1 = pCoef[(ia1 * 2u) + 1u];
co2 = pCoef[ia2 * 2u];
si2 = pCoef[(ia2 * 2u) + 1u];
co3 = pCoef[ia3 * 2u];
si3 = pCoef[(ia3 * 2u) + 1u];
/* Twiddle coefficients index modifier */
ia1 = ia1 + twidCoefModifier;
for (i0 = j; i0 < fftLen; i0 += n1)
{
/* index calculation for the input as, */
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
i1 = i0 + n2;
i2 = i1 + n2;
i3 = i2 + n2;
/* Butterfly implementation */
/* xa + xc */
r1 = pSrc[2u * i0] + pSrc[2u * i2];
/* xa - xc */
r2 = pSrc[2u * i0] - pSrc[2u * i2];
/* ya + yc */
s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
/* ya - yc */
s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
/* xb + xd */
t1 = pSrc[2u * i1] + pSrc[2u * i3];
/* xa' = xa + xb + xc + xd */
pSrc[2u * i0] = (r1 + t1) >> 2u;
/* xa + xc -(xb + xd) */
r1 = r1 - t1;
/* yb + yd */
t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
/* ya' = ya + yb + yc + yd */
pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
/* (ya + yc) - (yb + yd) */
s1 = s1 - t2;
/* (yb - yd) */
t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
/* (xb - xd) */
t2 = pSrc[2u * i1] - pSrc[2u * i3];
/* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u;
/* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u;
/* (xa - xc) + (yb - yd) */
r1 = r2 + t1;
/* (xa - xc) - (yb - yd) */
r2 = r2 - t1;
/* (ya - yc) - (xb - xd) */
s1 = s2 - t2;
/* (ya - yc) + (xb - xd) */
s2 = s2 + t2;
/* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
/* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
/* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
/* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
}
}
twidCoefModifier <<= 2u;
}
/* End of Middle stages process */
/* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
/* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
/* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
/* data is in 5.27(q27) format for the 16 point as there are no middle stages */
/* start of Last stage process */
/* Initializations for the last stage */
j = fftLen >> 2;
ptr1 = &pSrc[0];
/* Calculations of last stage */
do
{
#ifndef ARM_MATH_BIG_ENDIAN
/* Read xa (real), ya(imag) input */
xaya = *__SIMD64(ptr1)++;
xa = (q31_t) xaya;
ya = (q31_t) (xaya >> 32);
/* Read xb (real), yb(imag) input */
xbyb = *__SIMD64(ptr1)++;
xb = (q31_t) xbyb;
yb = (q31_t) (xbyb >> 32);
/* Read xc (real), yc(imag) input */
xcyc = *__SIMD64(ptr1)++;
xc = (q31_t) xcyc;
yc = (q31_t) (xcyc >> 32);
/* Read xc (real), yc(imag) input */
xdyd = *__SIMD64(ptr1)++;
xd = (q31_t) xdyd;
yd = (q31_t) (xdyd >> 32);
#else
/* Read xa (real), ya(imag) input */
xaya = *__SIMD64(ptr1)++;
ya = (q31_t) xaya;
xa = (q31_t) (xaya >> 32);
/* Read xb (real), yb(imag) input */
xbyb = *__SIMD64(ptr1)++;
yb = (q31_t) xbyb;
xb = (q31_t) (xbyb >> 32);
/* Read xc (real), yc(imag) input */
xcyc = *__SIMD64(ptr1)++;
yc = (q31_t) xcyc;
xc = (q31_t) (xcyc >> 32);
/* Read xc (real), yc(imag) input */
xdyd = *__SIMD64(ptr1)++;
yd = (q31_t) xdyd;
xd = (q31_t) (xdyd >> 32);
#endif
/* xa' = xa + xb + xc + xd */
xa_out = xa + xb + xc + xd;
/* ya' = ya + yb + yc + yd */
ya_out = ya + yb + yc + yd;
/* pointer updation for writing */
ptr1 = ptr1 - 8u;
/* writing xa' and ya' */
*ptr1++ = xa_out;
*ptr1++ = ya_out;
xc_out = (xa - xb + xc - xd);
yc_out = (ya - yb + yc - yd);
/* writing xc' and yc' */
*ptr1++ = xc_out;
*ptr1++ = yc_out;
xb_out = (xa + yb - xc - yd);
yb_out = (ya - xb - yc + xd);
/* writing xb' and yb' */
*ptr1++ = xb_out;
*ptr1++ = yb_out;
xd_out = (xa - yb - xc + yd);
yd_out = (ya + xb - yc - xd);
/* writing xd' and yd' */
*ptr1++ = xd_out;
*ptr1++ = yd_out;
} while(--j);
/* output is in 11.21(q21) format for the 1024 point */
/* output is in 9.23(q23) format for the 256 point */
/* output is in 7.25(q25) format for the 64 point */
/* output is in 5.27(q27) format for the 16 point */
/* End of last stage process */
}
/**
* @brief Core function for the Q31 CIFFT butterfly process.
* @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
* @param[in] fftLen length of the FFT.
* @param[in] *pCoef points to twiddle coefficient buffer.
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
/*
* Radix-4 IFFT algorithm used is :
*
* CIFFT uses same twiddle coefficients as CFFT Function
* x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]
*
*
* IFFT is implemented with following changes in equations from FFT
*
* Input real and imaginary data:
* x(n) = xa + j * ya
* x(n+N/4 ) = xb + j * yb
* x(n+N/2 ) = xc + j * yc
* x(n+3N 4) = xd + j * yd
*
*
* Output real and imaginary data:
* x(4r) = xa'+ j * ya'
* x(4r+1) = xb'+ j * yb'
* x(4r+2) = xc'+ j * yc'
* x(4r+3) = xd'+ j * yd'
*
*
* Twiddle factors for radix-4 IFFT:
* Wn = co1 + j * (si1)
* W2n = co2 + j * (si2)
* W3n = co3 + j * (si3)
* The real and imaginary output values for the radix-4 butterfly are
* xa' = xa + xb + xc + xd
* ya' = ya + yb + yc + yd
* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)
* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)
* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)
* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)
* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)
* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)
*
*/
void arm_radix4_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint32_t twidCoefModifier)
{
uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
q31_t xa, xb, xc, xd;
q31_t ya, yb, yc, yd;
q31_t xa_out, xb_out, xc_out, xd_out;
q31_t ya_out, yb_out, yc_out, yd_out;
q31_t *ptr1;
q63_t xaya, xbyb, xcyc, xdyd;
/* input is be 1.31(q31) format for all FFT sizes */
/* Total process is divided into three stages */
/* process first stage, middle stages, & last stage */
/* Start of first stage process */
/* Initializations for the first stage */
n2 = fftLen;
n1 = n2;
/* n2 = fftLen/4 */
n2 >>= 2u;
i0 = 0u;
ia1 = 0u;
j = n2;
do
{
/* input is in 1.31(q31) format and provide 4 guard bits for the input */
/* index calculation for the input as, */
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
i1 = i0 + n2;
i2 = i1 + n2;
i3 = i2 + n2;
/* Butterfly implementation */
/* xa + xc */
r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u);
/* xa - xc */
r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
/* xb + xd */
t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
/* ya + yc */
s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
/* ya - yc */
s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
/* xa' = xa + xb + xc + xd */
pSrc[2u * i0] = (r1 + t1);
/* (xa + xc) - (xb + xd) */
r1 = r1 - t1;
/* yb + yd */
t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
/* ya' = ya + yb + yc + yd */
pSrc[(2u * i0) + 1u] = (s1 + t2);
/* (ya + yc) - (yb + yd) */
s1 = s1 - t2;
/* yb - yd */
t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
/* xb - xd */
t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
/* index calculation for the coefficients */
ia2 = 2u * ia1;
co2 = pCoef[ia2 * 2u];
si2 = pCoef[(ia2 * 2u) + 1u];
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
/* (xa - xc) - (yb - yd) */
r1 = r2 - t1;
/* (xa - xc) + (yb - yd) */
r2 = r2 + t1;
/* (ya - yc) + (xb - xd) */
s1 = s2 + t2;
/* (ya - yc) - (xb - xd) */
s2 = s2 - t2;
co1 = pCoef[ia1 * 2u];
si1 = pCoef[(ia1 * 2u) + 1u];
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
/* index calculation for the coefficients */
ia3 = 3u * ia1;
co3 = pCoef[ia3 * 2u];
si3 = pCoef[(ia3 * 2u) + 1u];
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
/* Twiddle coefficients index modifier */
ia1 = ia1 + twidCoefModifier;
/* Updating input index */
i0 = i0 + 1u;
} while(--j);
/* data is in 5.27(q27) format */
/* each stage provides two down scaling of the input */
/* Start of Middle stages process */
twidCoefModifier <<= 2u;
/* Calculation of second stage to excluding last stage */
for (k = fftLen / 4u; k > 4u; k >>= 2u)
{
/* Initializations for the first stage */
n1 = n2;
n2 >>= 2u;
ia1 = 0u;
for (j = 0; j <= (n2 - 1u); j++)
{
/* index calculation for the coefficients */
ia2 = ia1 + ia1;
ia3 = ia2 + ia1;
co1 = pCoef[ia1 * 2u];
si1 = pCoef[(ia1 * 2u) + 1u];
co2 = pCoef[ia2 * 2u];
si2 = pCoef[(ia2 * 2u) + 1u];
co3 = pCoef[ia3 * 2u];
si3 = pCoef[(ia3 * 2u) + 1u];
/* Twiddle coefficients index modifier */
ia1 = ia1 + twidCoefModifier;
for (i0 = j; i0 < fftLen; i0 += n1)
{
/* index calculation for the input as, */
/* pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
i1 = i0 + n2;
i2 = i1 + n2;
i3 = i2 + n2;
/* Butterfly implementation */
/* xa + xc */
r1 = pSrc[2u * i0] + pSrc[2u * i2];
/* xa - xc */
r2 = pSrc[2u * i0] - pSrc[2u * i2];
/* ya + yc */
s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
/* ya - yc */
s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
/* xb + xd */
t1 = pSrc[2u * i1] + pSrc[2u * i3];
/* xa' = xa + xb + xc + xd */
pSrc[2u * i0] = (r1 + t1) >> 2u;
/* xa + xc -(xb + xd) */
r1 = r1 - t1;
/* yb + yd */
t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
/* ya' = ya + yb + yc + yd */
pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
/* (ya + yc) - (yb + yd) */
s1 = s1 - t2;
/* (yb - yd) */
t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
/* (xb - xd) */
t2 = pSrc[2u * i1] - pSrc[2u * i3];
/* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) -
((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u;
/* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
pSrc[(2u * i1) + 1u] =
(((int32_t) (((q63_t) s1 * co2) >> 32u)) +
((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u;
/* (xa - xc) - (yb - yd) */
r1 = r2 - t1;
/* (xa - xc) + (yb - yd) */
r2 = r2 + t1;
/* (ya - yc) + (xb - xd) */
s1 = s2 + t2;
/* (ya - yc) - (xb - xd) */
s2 = s2 - t2;
/* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
/* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
/* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
/* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
}
}
twidCoefModifier <<= 2u;
}
/* End of Middle stages process */
/* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
/* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
/* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
/* data is in 5.27(q27) format for the 16 point as there are no middle stages */
/* Start of last stage process */
/* Initializations for the last stage */
j = fftLen >> 2;
ptr1 = &pSrc[0];
/* Calculations of last stage */
do
{
#ifndef ARM_MATH_BIG_ENDIAN
/* Read xa (real), ya(imag) input */
xaya = *__SIMD64(ptr1)++;
xa = (q31_t) xaya;
ya = (q31_t) (xaya >> 32);
/* Read xb (real), yb(imag) input */
xbyb = *__SIMD64(ptr1)++;
xb = (q31_t) xbyb;
yb = (q31_t) (xbyb >> 32);
/* Read xc (real), yc(imag) input */
xcyc = *__SIMD64(ptr1)++;
xc = (q31_t) xcyc;
yc = (q31_t) (xcyc >> 32);
/* Read xc (real), yc(imag) input */
xdyd = *__SIMD64(ptr1)++;
xd = (q31_t) xdyd;
yd = (q31_t) (xdyd >> 32);
#else
/* Read xa (real), ya(imag) input */
xaya = *__SIMD64(ptr1)++;
ya = (q31_t) xaya;
xa = (q31_t) (xaya >> 32);
/* Read xb (real), yb(imag) input */
xbyb = *__SIMD64(ptr1)++;
yb = (q31_t) xbyb;
xb = (q31_t) (xbyb >> 32);
/* Read xc (real), yc(imag) input */
xcyc = *__SIMD64(ptr1)++;
yc = (q31_t) xcyc;
xc = (q31_t) (xcyc >> 32);
/* Read xc (real), yc(imag) input */
xdyd = *__SIMD64(ptr1)++;
yd = (q31_t) xdyd;
xd = (q31_t) (xdyd >> 32);
#endif
/* xa' = xa + xb + xc + xd */
xa_out = xa + xb + xc + xd;
/* ya' = ya + yb + yc + yd */
ya_out = ya + yb + yc + yd;
/* pointer updation for writing */
ptr1 = ptr1 - 8u;
/* writing xa' and ya' */
*ptr1++ = xa_out;
*ptr1++ = ya_out;
xc_out = (xa - xb + xc - xd);
yc_out = (ya - yb + yc - yd);
/* writing xc' and yc' */
*ptr1++ = xc_out;
*ptr1++ = yc_out;
xb_out = (xa - yb - xc + yd);
yb_out = (ya + xb - yc - xd);
/* writing xb' and yb' */
*ptr1++ = xb_out;
*ptr1++ = yb_out;
xd_out = (xa + yb - xc - yd);
yd_out = (ya - xb - yc + xd);
/* writing xd' and yd' */
*ptr1++ = xd_out;
*ptr1++ = yd_out;
} while(--j);
/* output is in 11.21(q21) format for the 1024 point */
/* output is in 9.23(q23) format for the 256 point */
/* output is in 7.25(q25) format for the 64 point */
/* output is in 5.27(q27) format for the 16 point */
/* End of last stage process */
}

View File

@ -0,0 +1,384 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_cfft_radix8_f32.c
*
* Description: Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupTransforms
*/
/**
* @defgroup Radix8_CFFT_CIFFT Radix-8 Complex FFT Functions
*
* \par
* Complex Fast Fourier Transform(CFFT) and Complex Inverse Fast Fourier Transform(CIFFT) is an efficient algorithm to compute Discrete Fourier Transform(DFT) and Inverse Discrete Fourier Transform(IDFT).
* Computational complexity of CFFT reduces drastically when compared to DFT.
* \par
* This set of functions implements CFFT/CIFFT
* for floating-point data types. The functions operates on in-place buffer which uses same buffer for input and output.
* Complex input is stored in input buffer in an interleaved fashion.
*
* \par
* The functions operate on blocks of input and output data and each call to the function processes
* <code>2*fftLen</code> samples through the transform. <code>pSrc</code> points to In-place arrays containing <code>2*fftLen</code> values.
* \par
* The <code>pSrc</code> points to the array of in-place buffer of size <code>2*fftLen</code> and inputs and outputs are stored in an interleaved fashion as shown below.
* <pre> {real[0], imag[0], real[1], imag[1],..} </pre>
*
* \par Lengths supported by the transform:
* \par
* Internally, the function utilize a Radix-8 decimation in frequency(DIF) algorithm
* and the size of the FFT supported are of the lengths [ 64, 512, 4096].
*
*
* \par Algorithm:
*
* <b>Complex Fast Fourier Transform:</b>
* \par
* Input real and imaginary data:
* <pre>
* x(n) = xa + j * ya
* x(n+N/4 ) = xb + j * yb
* x(n+N/2 ) = xc + j * yc
* x(n+3N 4) = xd + j * yd
* </pre>
* where N is length of FFT
* \par
* Output real and imaginary data:
* <pre>
* X(4r) = xa'+ j * ya'
* X(4r+1) = xb'+ j * yb'
* X(4r+2) = xc'+ j * yc'
* X(4r+3) = xd'+ j * yd'
* </pre>
* \par
* Twiddle factors for Radix-8 FFT:
* <pre>
* Wn = co1 + j * (- si1)
* W2n = co2 + j * (- si2)
* W3n = co3 + j * (- si3)
* </pre>
*
* \par
* \image html CFFT.gif "Radix-8 Decimation-in Frequency Complex Fast Fourier Transform"
*
* \par
* Output from Radix-8 CFFT Results in Digit reversal order. Interchange middle two branches of every butterfly results in Bit reversed output.
* \par
* <b> Butterfly CFFT equations:</b>
* <pre>
* xa' = xa + xb + xc + xd
* ya' = ya + yb + yc + yd
* xc' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)
* yc' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)
* xb' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)
* yb' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)
* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)
* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)
* </pre>
*
* \par
* where <code>fftLen</code> length of CFFT/CIFFT; <code>ifftFlag</code> Flag for selection of CFFT or CIFFT(Set ifftFlag to calculate CIFFT otherwise calculates CFFT);
* <code>bitReverseFlag</code> Flag for selection of output order(Set bitReverseFlag to output in normal order otherwise output in bit reversed order);
* <code>pTwiddle</code>points to array of twiddle coefficients; <code>pBitRevTable</code> points to the array of bit reversal table.
* <code>twidCoefModifier</code> modifier for twiddle factor table which supports all FFT lengths with same table;
* <code>pBitRevTable</code> modifier for bit reversal table which supports all FFT lengths with same table.
* <code>onebyfftLen</code> value of 1/fftLen to calculate CIFFT;
*
* \par Fixed-Point Behavior
* Care must be taken when using the fixed-point versions of the CFFT/CIFFT function.
* Refer to the function specific documentation below for usage guidelines.
*/
/*
* @brief Core function for the floating-point CFFT butterfly process.
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
* @param[in] fftLen length of the FFT.
* @param[in] *pCoef points to the twiddle coefficient buffer.
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_radix8_butterfly_f32(
float32_t * pSrc,
uint16_t fftLen,
const float32_t * pCoef,
uint16_t twidCoefModifier)
{
uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7;
uint32_t i1, i2, i3, i4, i5, i6, i7, i8;
uint32_t id;
uint32_t n1, n2, j;
float32_t r1, r2, r3, r4, r5, r6, r7, r8;
float32_t t1, t2;
float32_t s1, s2, s3, s4, s5, s6, s7, s8;
float32_t p1, p2, p3, p4;
float32_t co2, co3, co4, co5, co6, co7, co8;
float32_t si2, si3, si4, si5, si6, si7, si8;
const float32_t C81 = 0.70710678118f;
n2 = fftLen;
do
{
n1 = n2;
n2 = n2 >> 3;
i1 = 0;
do
{
i2 = i1 + n2;
i3 = i2 + n2;
i4 = i3 + n2;
i5 = i4 + n2;
i6 = i5 + n2;
i7 = i6 + n2;
i8 = i7 + n2;
r1 = pSrc[2 * i1] + pSrc[2 * i5];
r5 = pSrc[2 * i1] - pSrc[2 * i5];
r2 = pSrc[2 * i2] + pSrc[2 * i6];
r6 = pSrc[2 * i2] - pSrc[2 * i6];
r3 = pSrc[2 * i3] + pSrc[2 * i7];
r7 = pSrc[2 * i3] - pSrc[2 * i7];
r4 = pSrc[2 * i4] + pSrc[2 * i8];
r8 = pSrc[2 * i4] - pSrc[2 * i8];
t1 = r1 - r3;
r1 = r1 + r3;
r3 = r2 - r4;
r2 = r2 + r4;
pSrc[2 * i1] = r1 + r2;
pSrc[2 * i5] = r1 - r2;
r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
t2 = r1 - s3;
r1 = r1 + s3;
s3 = r2 - r4;
r2 = r2 + r4;
pSrc[2 * i1 + 1] = r1 + r2;
pSrc[2 * i5 + 1] = r1 - r2;
pSrc[2 * i3] = t1 + s3;
pSrc[2 * i7] = t1 - s3;
pSrc[2 * i3 + 1] = t2 - r3;
pSrc[2 * i7 + 1] = t2 + r3;
r1 = (r6 - r8) * C81;
r6 = (r6 + r8) * C81;
r2 = (s6 - s8) * C81;
s6 = (s6 + s8) * C81;
t1 = r5 - r1;
r5 = r5 + r1;
r8 = r7 - r6;
r7 = r7 + r6;
t2 = s5 - r2;
s5 = s5 + r2;
s8 = s7 - s6;
s7 = s7 + s6;
pSrc[2 * i2] = r5 + s7;
pSrc[2 * i8] = r5 - s7;
pSrc[2 * i6] = t1 + s8;
pSrc[2 * i4] = t1 - s8;
pSrc[2 * i2 + 1] = s5 - r7;
pSrc[2 * i8 + 1] = s5 + r7;
pSrc[2 * i6 + 1] = t2 - r8;
pSrc[2 * i4 + 1] = t2 + r8;
i1 += n1;
} while(i1 < fftLen);
if(n2 < 8)
break;
ia1 = 0;
j = 1;
do
{
/* index calculation for the coefficients */
id = ia1 + twidCoefModifier;
ia1 = id;
ia2 = ia1 + id;
ia3 = ia2 + id;
ia4 = ia3 + id;
ia5 = ia4 + id;
ia6 = ia5 + id;
ia7 = ia6 + id;
co2 = pCoef[2 * ia1];
co3 = pCoef[2 * ia2];
co4 = pCoef[2 * ia3];
co5 = pCoef[2 * ia4];
co6 = pCoef[2 * ia5];
co7 = pCoef[2 * ia6];
co8 = pCoef[2 * ia7];
si2 = pCoef[2 * ia1 + 1];
si3 = pCoef[2 * ia2 + 1];
si4 = pCoef[2 * ia3 + 1];
si5 = pCoef[2 * ia4 + 1];
si6 = pCoef[2 * ia5 + 1];
si7 = pCoef[2 * ia6 + 1];
si8 = pCoef[2 * ia7 + 1];
i1 = j;
do
{
/* index calculation for the input */
i2 = i1 + n2;
i3 = i2 + n2;
i4 = i3 + n2;
i5 = i4 + n2;
i6 = i5 + n2;
i7 = i6 + n2;
i8 = i7 + n2;
r1 = pSrc[2 * i1] + pSrc[2 * i5];
r5 = pSrc[2 * i1] - pSrc[2 * i5];
r2 = pSrc[2 * i2] + pSrc[2 * i6];
r6 = pSrc[2 * i2] - pSrc[2 * i6];
r3 = pSrc[2 * i3] + pSrc[2 * i7];
r7 = pSrc[2 * i3] - pSrc[2 * i7];
r4 = pSrc[2 * i4] + pSrc[2 * i8];
r8 = pSrc[2 * i4] - pSrc[2 * i8];
t1 = r1 - r3;
r1 = r1 + r3;
r3 = r2 - r4;
r2 = r2 + r4;
pSrc[2 * i1] = r1 + r2;
r2 = r1 - r2;
s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
t2 = s1 - s3;
s1 = s1 + s3;
s3 = s2 - s4;
s2 = s2 + s4;
r1 = t1 + s3;
t1 = t1 - s3;
pSrc[2 * i1 + 1] = s1 + s2;
s2 = s1 - s2;
s1 = t2 - r3;
t2 = t2 + r3;
p1 = co5 * r2;
p2 = si5 * s2;
p3 = co5 * s2;
p4 = si5 * r2;
pSrc[2 * i5] = p1 + p2;
pSrc[2 * i5 + 1] = p3 - p4;
p1 = co3 * r1;
p2 = si3 * s1;
p3 = co3 * s1;
p4 = si3 * r1;
pSrc[2 * i3] = p1 + p2;
pSrc[2 * i3 + 1] = p3 - p4;
p1 = co7 * t1;
p2 = si7 * t2;
p3 = co7 * t2;
p4 = si7 * t1;
pSrc[2 * i7] = p1 + p2;
pSrc[2 * i7 + 1] = p3 - p4;
r1 = (r6 - r8) * C81;
r6 = (r6 + r8) * C81;
s1 = (s6 - s8) * C81;
s6 = (s6 + s8) * C81;
t1 = r5 - r1;
r5 = r5 + r1;
r8 = r7 - r6;
r7 = r7 + r6;
t2 = s5 - s1;
s5 = s5 + s1;
s8 = s7 - s6;
s7 = s7 + s6;
r1 = r5 + s7;
r5 = r5 - s7;
r6 = t1 + s8;
t1 = t1 - s8;
s1 = s5 - r7;
s5 = s5 + r7;
s6 = t2 - r8;
t2 = t2 + r8;
p1 = co2 * r1;
p2 = si2 * s1;
p3 = co2 * s1;
p4 = si2 * r1;
pSrc[2 * i2] = p1 + p2;
pSrc[2 * i2 + 1] = p3 - p4;
p1 = co8 * r5;
p2 = si8 * s5;
p3 = co8 * s5;
p4 = si8 * r5;
pSrc[2 * i8] = p1 + p2;
pSrc[2 * i8 + 1] = p3 - p4;
p1 = co6 * r6;
p2 = si6 * s6;
p3 = co6 * s6;
p4 = si6 * r6;
pSrc[2 * i6] = p1 + p2;
pSrc[2 * i6 + 1] = p3 - p4;
p1 = co4 * t1;
p2 = si4 * t2;
p3 = co4 * t2;
p4 = si4 * t1;
pSrc[2 * i4] = p1 + p2;
pSrc[2 * i4 + 1] = p3 - p4;
i1 += n1;
} while(i1 < fftLen);
j++;
} while(j < n2);
twidCoefModifier <<= 3;
} while(n2 > 7);
}
/**
* @} end of Radix8_CFFT_CIFFT group
*/

View File

@ -0,0 +1,461 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_dct4_f32.c
*
* Description: Processing function of DCT4 & IDCT4 F32.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @ingroup groupTransforms
*/
/**
* @defgroup DCT4_IDCT4 DCT Type IV Functions
* Representation of signals by minimum number of values is important for storage and transmission.
* The possibility of large discontinuity between the beginning and end of a period of a signal
* in DFT can be avoided by extending the signal so that it is even-symmetric.
* Discrete Cosine Transform (DCT) is constructed such that its energy is heavily concentrated in the lower part of the
* spectrum and is very widely used in signal and image coding applications.
* The family of DCTs (DCT type- 1,2,3,4) is the outcome of different combinations of homogeneous boundary conditions.
* DCT has an excellent energy-packing capability, hence has many applications and in data compression in particular.
*
* DCT is essentially the Discrete Fourier Transform(DFT) of an even-extended real signal.
* Reordering of the input data makes the computation of DCT just a problem of
* computing the DFT of a real signal with a few additional operations.
* This approach provides regular, simple, and very efficient DCT algorithms for practical hardware and software implementations.
*
* DCT type-II can be implemented using Fast fourier transform (FFT) internally, as the transform is applied on real values, Real FFT can be used.
* DCT4 is implemented using DCT2 as their implementations are similar except with some added pre-processing and post-processing.
* DCT2 implementation can be described in the following steps:
* - Re-ordering input
* - Calculating Real FFT
* - Multiplication of weights and Real FFT output and getting real part from the product.
*
* This process is explained by the block diagram below:
* \image html DCT4.gif "Discrete Cosine Transform - type-IV"
*
* \par Algorithm:
* The N-point type-IV DCT is defined as a real, linear transformation by the formula:
* \image html DCT4Equation.gif
* where <code>k = 0,1,2,.....N-1</code>
*\par
* Its inverse is defined as follows:
* \image html IDCT4Equation.gif
* where <code>n = 0,1,2,.....N-1</code>
*\par
* The DCT4 matrices become involutory (i.e. they are self-inverse) by multiplying with an overall scale factor of sqrt(2/N).
* The symmetry of the transform matrix indicates that the fast algorithms for the forward
* and inverse transform computation are identical.
* Note that the implementation of Inverse DCT4 and DCT4 is same, hence same process function can be used for both.
*
* \par Lengths supported by the transform:
* As DCT4 internally uses Real FFT, it supports all the lengths supported by arm_rfft_f32().
* The library provides separate functions for Q15, Q31, and floating-point data types.
* \par Instance Structure
* The instances for Real FFT and FFT, cosine values table and twiddle factor table are stored in an instance data structure.
* A separate instance structure must be defined for each transform.
* There are separate instance structure declarations for each of the 3 supported data types.
*
* \par Initialization Functions
* There is also an associated initialization function for each data type.
* The initialization function performs the following operations:
* - Sets the values of the internal structure fields.
* - Initializes Real FFT as its process function is used internally in DCT4, by calling arm_rfft_init_f32().
* \par
* Use of the initialization function is optional.
* However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
* To place an instance structure into a const data section, the instance structure must be manually initialized.
* Manually initialize the instance structure as follows:
* <pre>
*arm_dct4_instance_f32 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
*arm_dct4_instance_q31 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
*arm_dct4_instance_q15 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
* </pre>
* where \c N is the length of the DCT4; \c Nby2 is half of the length of the DCT4;
* \c normalize is normalizing factor used and is equal to <code>sqrt(2/N)</code>;
* \c pTwiddle points to the twiddle factor table;
* \c pCosFactor points to the cosFactor table;
* \c pRfft points to the real FFT instance;
* \c pCfft points to the complex FFT instance;
* The CFFT and RFFT structures also needs to be initialized, refer to arm_cfft_radix4_f32()
* and arm_rfft_f32() respectively for details regarding static initialization.
*
* \par Fixed-Point Behavior
* Care must be taken when using the fixed-point versions of the DCT4 transform functions.
* In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
* Refer to the function specific documentation below for usage guidelines.
*/
/**
* @addtogroup DCT4_IDCT4
* @{
*/
/**
* @brief Processing function for the floating-point DCT4/IDCT4.
* @param[in] *S points to an instance of the floating-point DCT4/IDCT4 structure.
* @param[in] *pState points to state buffer.
* @param[in,out] *pInlineBuffer points to the in-place input and output buffer.
* @return none.
*/
void arm_dct4_f32(
const arm_dct4_instance_f32 * S,
float32_t * pState,
float32_t * pInlineBuffer)
{
uint32_t i; /* Loop counter */
float32_t *weights = S->pTwiddle; /* Pointer to the Weights table */
float32_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
float32_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
float32_t in; /* Temporary variable */
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
* along with some pre-processing and post-processing.
* Computational procedure is explained as follows:
* (a) Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
* (b) Calculation of DCT2 using FFT is divided into three steps:
* Step1: Re-ordering of even and odd elements of input.
* Step2: Calculating FFT of the re-ordered input.
* Step3: Taking the real part of the product of FFT output and weights.
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* where,
* Y4 -- DCT4 output, Y2 -- DCT2 output
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
*/
/*-------- Pre-processing ------------*/
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
arm_scale_f32(pInlineBuffer, 2.0f, pInlineBuffer, S->N);
arm_mult_f32(pInlineBuffer, cosFact, pInlineBuffer, S->N);
/* ----------------------------------------------------------------
* Step1: Re-ordering of even and odd elements as,
* pState[i] = pInlineBuffer[2*i] and
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
---------------------------------------------------------------------*/
/* pS1 initialized to pState */
pS1 = pState;
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
pS2 = pState + (S->N - 1u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
i = (uint32_t) S->Nby2 >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = (uint32_t) S->N >> 2u;
/* Processing with loop unrolling 4 times as N is always multiple of 4.
* Compute 4 outputs at a time */
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_f32(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_f32(pState, weights, pState, S->N);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
i = ((uint32_t) S->N - 1u) >> 2u;
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ * (float32_t) 0.5;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
i = ((uint32_t) S->N - 1u) % 0x4u;
while(i > 0u)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = (uint32_t) S->N >> 2u;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = in * S->normalize;
in = *pbuff;
*pbuff++ = in * S->normalize;
in = *pbuff;
*pbuff++ = in * S->normalize;
in = *pbuff;
*pbuff++ = in * S->normalize;
/* Decrement the loop counter */
i--;
} while(i > 0u);
#else
/* Run the below code for Cortex-M0 */
/* Initializing the loop counter to N/2 */
i = (uint32_t) S->Nby2;
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter */
i = (uint32_t) S->N;
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_f32(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_f32(pState, weights, pState, S->N);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ * (float32_t) 0.5;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* Initializing the loop counter */
i = ((uint32_t) S->N - 1u);
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter */
i = (uint32_t) S->N;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = in * S->normalize;
/* Decrement the loop counter */
i--;
} while(i > 0u);
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of DCT4_IDCT4 group
*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,394 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_dct4_q15.c
*
* Description: Processing function of DCT4 & IDCT4 Q15.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @addtogroup DCT4_IDCT4
* @{
*/
/**
* @brief Processing function for the Q15 DCT4/IDCT4.
* @param[in] *S points to an instance of the Q15 DCT4 structure.
* @param[in] *pState points to state buffer.
* @param[in,out] *pInlineBuffer points to the in-place input and output buffer.
* @return none.
*
* \par Input an output formats:
* Internally inputs are downscaled in the RFFT process function to avoid overflows.
* Number of bits downscaled, depends on the size of the transform.
* The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
*
* \image html dct4FormatsQ15Table.gif
*/
void arm_dct4_q15(
const arm_dct4_instance_q15 * S,
q15_t * pState,
q15_t * pInlineBuffer)
{
uint32_t i; /* Loop counter */
q15_t *weights = S->pTwiddle; /* Pointer to the Weights table */
q15_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
q15_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
q15_t in; /* Temporary variable */
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
* along with some pre-processing and post-processing.
* Computational procedure is explained as follows:
* (a) Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
* (b) Calculation of DCT2 using FFT is divided into three steps:
* Step1: Re-ordering of even and odd elements of input.
* Step2: Calculating FFT of the re-ordered input.
* Step3: Taking the real part of the product of FFT output and weights.
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* where,
* Y4 -- DCT4 output, Y2 -- DCT2 output
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
*/
/*-------- Pre-processing ------------*/
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
arm_mult_q15(pInlineBuffer, cosFact, pInlineBuffer, S->N);
arm_shift_q15(pInlineBuffer, 1, pInlineBuffer, S->N);
/* ----------------------------------------------------------------
* Step1: Re-ordering of even and odd elements as
* pState[i] = pInlineBuffer[2*i] and
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
---------------------------------------------------------------------*/
/* pS1 initialized to pState */
pS1 = pState;
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
pS2 = pState + (S->N - 1u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
i = (uint32_t) S->Nby2 >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = (uint32_t) S->N >> 2u;
/* Processing with loop unrolling 4 times as N is always multiple of 4.
* Compute 4 outputs at a time */
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.13 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
arm_shift_q15(pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
i = ((uint32_t) S->N - 1u) >> 2u;
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1u;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
i = ((uint32_t) S->N - 1u) % 0x4u;
while(i > 0u)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = (uint32_t) S->N >> 2u;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
/* Decrement the loop counter */
i--;
} while(i > 0u);
#else
/* Run the below code for Cortex-M0 */
/* Initializing the loop counter to N/2 */
i = (uint32_t) S->Nby2;
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter */
i = (uint32_t) S->N;
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.13 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
arm_shift_q15(pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter */
i = ((uint32_t) S->N - 1u);
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1u;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter */
i = (uint32_t) S->N;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
/* Decrement the loop counter */
i--;
} while(i > 0u);
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of DCT4_IDCT4 group
*/

View File

@ -0,0 +1,395 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_dct4_q31.c
*
* Description: Processing function of DCT4 & IDCT4 Q31.
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
/**
* @addtogroup DCT4_IDCT4
* @{
*/
/**
* @brief Processing function for the Q31 DCT4/IDCT4.
* @param[in] *S points to an instance of the Q31 DCT4 structure.
* @param[in] *pState points to state buffer.
* @param[in,out] *pInlineBuffer points to the in-place input and output buffer.
* @return none.
* \par Input an output formats:
* Input samples need to be downscaled by 1 bit to avoid saturations in the Q31 DCT process,
* as the conversion from DCT2 to DCT4 involves one subtraction.
* Internally inputs are downscaled in the RFFT process function to avoid overflows.
* Number of bits downscaled, depends on the size of the transform.
* The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
*
* \image html dct4FormatsQ31Table.gif
*/
void arm_dct4_q31(
const arm_dct4_instance_q31 * S,
q31_t * pState,
q31_t * pInlineBuffer)
{
uint16_t i; /* Loop counter */
q31_t *weights = S->pTwiddle; /* Pointer to the Weights table */
q31_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
q31_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
q31_t in; /* Temporary variable */
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
* along with some pre-processing and post-processing.
* Computational procedure is explained as follows:
* (a) Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
* (b) Calculation of DCT2 using FFT is divided into three steps:
* Step1: Re-ordering of even and odd elements of input.
* Step2: Calculating FFT of the re-ordered input.
* Step3: Taking the real part of the product of FFT output and weights.
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* where,
* Y4 -- DCT4 output, Y2 -- DCT2 output
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
*/
/*-------- Pre-processing ------------*/
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
arm_mult_q31(pInlineBuffer, cosFact, pInlineBuffer, S->N);
arm_shift_q31(pInlineBuffer, 1, pInlineBuffer, S->N);
/* ----------------------------------------------------------------
* Step1: Re-ordering of even and odd elements as
* pState[i] = pInlineBuffer[2*i] and
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
---------------------------------------------------------------------*/
/* pS1 initialized to pState */
pS1 = pState;
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
pS2 = pState + (S->N - 1u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
i = S->Nby2 >> 2u;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = S->N >> 2u;
/* Processing with loop unrolling 4 times as N is always multiple of 4.
* Compute 4 outputs at a time */
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q31(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q31(pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.29 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
arm_shift_q31(pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
i = (S->N - 1u) >> 2u;
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1u;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
i = (S->N - 1u) % 0x4u;
while(i > 0u)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = S->N >> 2u;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
/* Decrement the loop counter */
i--;
} while(i > 0u);
#else
/* Run the below code for Cortex-M0 */
/* Initializing the loop counter to N/2 */
i = S->Nby2;
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter */
i = S->N;
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while(i > 0u);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q31(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q31(pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.29 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
arm_shift_q31(pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1u;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* Initializing the loop counter */
i = (S->N - 1u);
while(i > 0u)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter */
i = S->N;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
/* Decrement the loop counter */
i--;
} while(i > 0u);
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @} end of DCT4_IDCT4 group
*/

View File

@ -0,0 +1,329 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_rfft_f32.c
*
* Description: RFFT & RIFFT Floating point process function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
extern void arm_radix4_butterfly_f32(
float32_t * pSrc,
uint16_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier);
extern void arm_radix4_butterfly_inverse_f32(
float32_t * pSrc,
uint16_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier,
float32_t onebyfftLen);
extern void arm_bitreversal_f32(
float32_t * pSrc,
uint16_t fftSize,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/**
* @ingroup groupTransforms
*/
/*--------------------------------------------------------------------
* Internal functions prototypes
*--------------------------------------------------------------------*/
void arm_split_rfft_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pATable,
float32_t * pBTable,
float32_t * pDst,
uint32_t modifier);
void arm_split_rifft_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pATable,
float32_t * pBTable,
float32_t * pDst,
uint32_t modifier);
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Processing function for the floating-point RFFT/RIFFT.
* @deprecated Do not use this function. It has been superceded by \ref arm_rfft_fast_f32 and will be removed
* in the future.
* @param[in] *S points to an instance of the floating-point RFFT/RIFFT structure.
* @param[in] *pSrc points to the input buffer.
* @param[out] *pDst points to the output buffer.
* @return none.
*/
void arm_rfft_f32(
const arm_rfft_instance_f32 * S,
float32_t * pSrc,
float32_t * pDst)
{
const arm_cfft_radix4_instance_f32 *S_CFFT = S->pCfft;
/* Calculation of Real IFFT of input */
if(S->ifftFlagR == 1u)
{
/* Real IFFT core process */
arm_split_rifft_f32(pSrc, S->fftLenBy2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
/* Complex radix-4 IFFT process */
arm_radix4_butterfly_inverse_f32(pDst, S_CFFT->fftLen,
S_CFFT->pTwiddle,
S_CFFT->twidCoefModifier,
S_CFFT->onebyfftLen);
/* Bit reversal process */
if(S->bitReverseFlagR == 1u)
{
arm_bitreversal_f32(pDst, S_CFFT->fftLen,
S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
}
}
else
{
/* Calculation of RFFT of input */
/* Complex radix-4 FFT process */
arm_radix4_butterfly_f32(pSrc, S_CFFT->fftLen,
S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);
/* Bit reversal process */
if(S->bitReverseFlagR == 1u)
{
arm_bitreversal_f32(pSrc, S_CFFT->fftLen,
S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
}
/* Real FFT core process */
arm_split_rfft_f32(pSrc, S->fftLenBy2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
}
}
/**
* @} end of RealFFT group
*/
/**
* @brief Core Real FFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_split_rfft_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pATable,
float32_t * pBTable,
float32_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
float32_t outR, outI; /* Temporary variables for output */
float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
float32_t *pDst1 = &pDst[2], *pDst2 = &pDst[(4u * fftLen) - 1u]; /* temp pointers for output buffer */
float32_t *pSrc1 = &pSrc[2], *pSrc2 = &pSrc[(2u * fftLen) - 1u]; /* temp pointers for input buffer */
/* Init coefficient pointers */
pCoefA = &pATable[modifier * 2u];
pCoefB = &pBTable[modifier * 2u];
i = fftLen - 1u;
while(i > 0u)
{
/*
outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
/* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
/* read pATable[2 * i] */
CoefA1 = *pCoefA++;
/* pATable[2 * i + 1] */
CoefA2 = *pCoefA;
/* pSrc[2 * i] * pATable[2 * i] */
outR = *pSrc1 * CoefA1;
/* pSrc[2 * i] * CoefA2 */
outI = *pSrc1++ * CoefA2;
/* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
outR -= (*pSrc1 + *pSrc2) * CoefA2;
/* pSrc[2 * i + 1] * CoefA1 */
outI += *pSrc1++ * CoefA1;
CoefB1 = *pCoefB;
/* pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
outI -= *pSrc2-- * CoefB1;
/* pSrc[2 * fftLen - 2 * i] * CoefA2 */
outI -= *pSrc2 * CoefA2;
/* pSrc[2 * fftLen - 2 * i] * CoefB1 */
outR += *pSrc2-- * CoefB1;
/* write output */
*pDst1++ = outR;
*pDst1++ = outI;
/* write complex conjugate output */
*pDst2-- = -outI;
*pDst2-- = outR;
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2u);
pCoefA = pCoefA + ((modifier * 2u) - 1u);
i--;
}
pDst[2u * fftLen] = pSrc[0] - pSrc[1];
pDst[(2u * fftLen) + 1u] = 0.0f;
pDst[0] = pSrc[0] + pSrc[1];
pDst[1] = 0.0f;
}
/**
* @brief Core Real IFFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_split_rifft_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pATable,
float32_t * pBTable,
float32_t * pDst,
uint32_t modifier)
{
float32_t outR, outI; /* Temporary variables for output */
float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
float32_t *pSrc1 = &pSrc[0], *pSrc2 = &pSrc[(2u * fftLen) + 1u];
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
while(fftLen > 0u)
{
/*
outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
CoefA1 = *pCoefA++;
CoefA2 = *pCoefA;
/* outR = (pSrc[2 * i] * CoefA1 */
outR = *pSrc1 * CoefA1;
/* - pSrc[2 * i] * CoefA2 */
outI = -(*pSrc1++) * CoefA2;
/* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
outR += (*pSrc1 + *pSrc2) * CoefA2;
/* pSrc[2 * i + 1] * CoefA1 */
outI += (*pSrc1++) * CoefA1;
CoefB1 = *pCoefB;
/* - pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
outI -= *pSrc2-- * CoefB1;
/* pSrc[2 * fftLen - 2 * i] * CoefB1 */
outR += *pSrc2 * CoefB1;
/* pSrc[2 * fftLen - 2 * i] * CoefA2 */
outI += *pSrc2-- * CoefA2;
/* write output */
*pDst++ = outR;
*pDst++ = outI;
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2u);
pCoefA = pCoefA + ((modifier * 2u) - 1u);
/* Decrement loop count */
fftLen--;
}
}

View File

@ -0,0 +1,354 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_rfft_f32.c
*
* Description: RFFT & RIFFT Floating point process function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
void stage_rfft_f32(
arm_rfft_fast_instance_f32 * S,
float32_t * p, float32_t * pOut)
{
uint32_t k; /* Loop Counter */
float32_t twR, twI; /* RFFT Twiddle coefficients */
float32_t * pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
float32_t *pA = p; /* increasing pointer */
float32_t *pB = p; /* decreasing pointer */
float32_t xAR, xAI, xBR, xBI; /* temporary variables */
float32_t t1a, t1b; /* temporary variables */
float32_t p0, p1, p2, p3; /* temporary variables */
k = (S->Sint).fftLen - 1;
/* Pack first and last sample of the frequency domain together */
xBR = pB[0];
xBI = pB[1];
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++ ;
twI = *pCoeff++ ;
// U1 = XA(1) + XB(1); % It is real
t1a = xBR + xAR ;
// U2 = XB(1) - XA(1); % It is imaginary
t1b = xBI + xAI ;
// real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
// imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
*pOut++ = 0.5f * ( t1a + t1b );
*pOut++ = 0.5f * ( t1a - t1b );
// XA(1) = 1/2*( U1 - imag(U2) + i*( U1 +imag(U2) ));
pB = p + 2*k;
pA += 2;
do
{
/*
function X = my_split_rfft(X, ifftFlag)
% X is a series of real numbers
L = length(X);
XC = X(1:2:end) +i*X(2:2:end);
XA = fft(XC);
XB = conj(XA([1 end:-1:2]));
TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
for l = 2:L/2
XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
end
XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
X = XA;
*/
xBI = pB[1];
xBR = pB[0];
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++;
twI = *pCoeff++;
t1a = xBR - xAR ;
t1b = xBI + xAI ;
// real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
// imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
p0 = twR * t1a;
p1 = twI * t1a;
p2 = twR * t1b;
p3 = twI * t1b;
*pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
*pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
pA += 2;
pB -= 2;
k--;
} while(k > 0u);
}
/* Prepares data for inverse cfft */
void merge_rfft_f32(
arm_rfft_fast_instance_f32 * S,
float32_t * p, float32_t * pOut)
{
uint32_t k; /* Loop Counter */
float32_t twR, twI; /* RFFT Twiddle coefficients */
float32_t *pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
float32_t *pA = p; /* increasing pointer */
float32_t *pB = p; /* decreasing pointer */
float32_t xAR, xAI, xBR, xBI; /* temporary variables */
float32_t t1a, t1b, r, s, t, u; /* temporary variables */
k = (S->Sint).fftLen - 1;
xAR = pA[0];
xAI = pA[1];
pCoeff += 2 ;
*pOut++ = 0.5f * ( xAR + xAI );
*pOut++ = 0.5f * ( xAR - xAI );
pB = p + 2*k ;
pA += 2 ;
while(k > 0u)
{
/* G is half of the frequency complex spectrum */
//for k = 2:N
// Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
xBI = pB[1] ;
xBR = pB[0] ;
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++;
twI = *pCoeff++;
t1a = xAR - xBR ;
t1b = xAI + xBI ;
r = twR * t1a;
s = twI * t1b;
t = twI * t1a;
u = twR * t1b;
// real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
// imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
*pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
*pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
pA += 2;
pB -= 2;
k--;
}
}
/**
* @ingroup groupTransforms
*/
/**
* @defgroup Fast Real FFT Functions
*
* \par
* The CMSIS DSP library includes specialized algorithms for computing the
* FFT of real data sequences. The FFT is defined over complex data but
* in many applications the input is real. Real FFT algorithms take advantage
* of the symmetry properties of the FFT and have a speed advantage over complex
* algorithms of the same length.
* \par
* The Fast RFFT algorith relays on the mixed radix CFFT that save processor usage.
* \par
* The real length N forward FFT of a sequence is computed using the steps shown below.
* \par
* \image html RFFT.gif "Real Fast Fourier Transform"
* \par
* The real sequence is initially treated as if it were complex to perform a CFFT.
* Later, a processing stage reshapes the data to obtain half of the frequency spectrum
* in complex format. Except the first complex number that contains the two real numbers
* X[0] and X[N/2] all the data is complex. In other words, the first complex sample
* contains two real values packed.
* \par
* The input for the inverse RFFT should keep the same format as the output of the
* forward RFFT. A first processing stage pre-process the data to later perform an
* inverse CFFT.
* \par
* \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
* \par
* The algorithms for floating-point, Q15, and Q31 data are slightly different
* and we describe each algorithm in turn.
* \par Floating-point
* The main functions are <code>arm_rfft_fast_f32()</code>
* and <code>arm_rfft_fast_init_f32()</code>. The older functions
* <code>arm_rfft_f32()</code> and <code>arm_rfft_init_f32()</code> have been
* deprecated but are still documented.
* \par
* The FFT of a real N-point sequence has even symmetry in the frequency
* domain. The second half of the data equals the conjugate of the first half
* flipped in frequency:
* <pre>
*X[0] - real data
*X[1] - complex data
*X[2] - complex data
*...
*X[fftLen/2-1] - complex data
*X[fftLen/2] - real data
*X[fftLen/2+1] - conjugate of X[fftLen/2-1]
*X[fftLen/2+2] - conjugate of X[fftLen/2-2]
*...
*X[fftLen-1] - conjugate of X[1]
* </pre>
* Looking at the data, we see that we can uniquely represent the FFT using only
* <pre>
*N/2+1 samples:
*X[0] - real data
*X[1] - complex data
*X[2] - complex data
*...
*X[fftLen/2-1] - complex data
*X[fftLen/2] - real data
* </pre>
* Looking more closely we see that the first and last samples are real valued.
* They can be packed together and we can thus represent the FFT of an N-point
* real sequence by N/2 complex values:
* <pre>
*X[0],X[N/2] - packed real data: X[0] + jX[N/2]
*X[1] - complex data
*X[2] - complex data
*...
*X[fftLen/2-1] - complex data
* </pre>
* The real FFT functions pack the frequency domain data in this fashion. The
* forward transform outputs the data in this form and the inverse transform
* expects input data in this form. The function always performs the needed
* bitreversal so that the input and output data is always in normal order. The
* functions support lengths of [32, 64, 128, ..., 4096] samples.
* \par
* The forward and inverse real FFT functions apply the standard FFT scaling; no
* scaling on the forward transform and 1/fftLen scaling on the inverse
* transform.
* \par Q15 and Q31
* The real algorithms are defined in a similar manner and utilize N/2 complex
* transforms behind the scenes. In the case of fixed-point data, a radix-4
* complex transform is performed and this limits the allows sequence lengths to
* 128, 512, and 2048 samples.
* \par
* TBD. We need to document input and output order of data.
* \par
* The complex transforms used internally include scaling to prevent fixed-point
* overflows. The overall scaling equals 1/(fftLen/2).
* \par
* A separate instance structure must be defined for each transform used but
* twiddle factor and bit reversal tables can be reused.
* \par
* There is also an associated initialization function for each data type.
* The initialization function performs the following operations:
* - Sets the values of the internal structure fields.
* - Initializes twiddle factor table and bit reversal table pointers.
* - Initializes the internal complex FFT data structure.
* \par
* Use of the initialization function is optional.
* However, if the initialization function is used, then the instance structure
* cannot be placed into a const data section. To place an instance structure
* into a const data section, the instance structure should be manually
* initialized as follows:
* <pre>
*arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
*arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
* </pre>
* where <code>fftLenReal</code> is the length of the real transform;
* <code>fftLenBy2</code> length of the internal complex transform.
* <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
* <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
* output (=1).
* <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
* The value is based on the FFT length;
* <code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
* <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
* <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
* must also be initialized. Refer to arm_cfft_radix4_f32() for details regarding
* static initialization of the complex FFT instance structure.
*/
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Processing function for the floating-point real FFT.
* @param[in] *S points to an arm_rfft_fast_instance_f32 structure.
* @param[in] *p points to the input buffer.
* @param[in] *pOut points to an arm_rfft_fast_instance_f32 structure.
* @param[in] ifftFlag RFFT if flag is 0, RIFFT if flag is 1
* @return none.
*/
void arm_rfft_fast_f32(
arm_rfft_fast_instance_f32 * S,
float32_t * p, float32_t * pOut,
uint8_t ifftFlag)
{
arm_cfft_instance_f32 * Sint = &(S->Sint);
Sint->fftLen = S->fftLenRFFT / 2;
/* Calculation of Real FFT */
if(ifftFlag)
{
/* Real FFT comression */
merge_rfft_f32(S, p, pOut);
/* Complex radix-4 IFFT process */
arm_cfft_f32( Sint, pOut, ifftFlag, 1);
}
else
{
/* Calculation of RFFT of input */
arm_cfft_f32( Sint, p, ifftFlag, 1);
/* Real FFT extraction */
stage_rfft_f32(S, p, pOut);
}
}

View File

@ -0,0 +1,139 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_cfft_init_f32.c
*
* Description: Split Radix Decimation in Frequency CFFT Floating point processing function
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Initialization function for the floating-point real FFT.
* @param[in,out] *S points to an arm_rfft_fast_instance_f32 structure.
* @param[in] fftLen length of the Real Sequence.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise RFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of RFFT/CIFFT process. Supported FFT Lengths are 16, 32, 64, 128, 256, 512, 1024, 2048, 4096.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_rfft_fast_init_f32(
arm_rfft_fast_instance_f32 * S,
uint16_t fftLen)
{
arm_cfft_instance_f32 * Sint;
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
Sint = &(S->Sint);
Sint->fftLen = fftLen/2;
S->fftLenRFFT = fftLen;
/* Initialise the Twiddle coefficient pointer */
// S->pTwiddle = (float32_t *) twiddleCoef;
/* Initializations of structure parameters depending on the FFT length */
switch (Sint->fftLen)
{
case 4096u:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the bit reversal table length */
Sint->bitRevLength = ARMBITREVINDEXTABLE4096_TABLE_LENGTH;
/* Initialise the bit reversal table pointer */
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable4096;
/* Initialise the 1/fftLen Value */
break;
case 2048u:
Sint->bitRevLength = ARMBITREVINDEXTABLE2048_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable2048;
break;
case 1024u:
Sint->bitRevLength = ARMBITREVINDEXTABLE1024_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable1024;
break;
case 512u:
Sint->bitRevLength = ARMBITREVINDEXTABLE_512_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable512;
break;
case 256u:
Sint->bitRevLength = ARMBITREVINDEXTABLE_256_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable256;
break;
case 128u:
Sint->bitRevLength = ARMBITREVINDEXTABLE_128_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable128;
break;
case 64u:
Sint->bitRevLength = ARMBITREVINDEXTABLE__64_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable64;
break;
case 32u:
Sint->bitRevLength = ARMBITREVINDEXTABLE__32_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable32;
break;
case 16u:
Sint->bitRevLength = ARMBITREVINDEXTABLE__16_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable16;
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of RealFFT group
*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,482 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_rfft_q15.c
*
* Description: RFFT & RIFFT Q15 process function
*
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
void arm_radix4_butterfly_q15(
q15_t * pSrc16,
uint32_t fftLen,
q15_t * pCoef16,
uint32_t twidCoefModifier);
void arm_radix4_butterfly_inverse_q15(
q15_t * pSrc16,
uint32_t fftLen,
q15_t * pCoef16,
uint32_t twidCoefModifier);
void arm_bitreversal_q15(
q15_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/*--------------------------------------------------------------------
* Internal functions prototypes
--------------------------------------------------------------------*/
void arm_split_rfft_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pATable,
q15_t * pBTable,
q15_t * pDst,
uint32_t modifier);
void arm_split_rifft_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pATable,
q15_t * pBTable,
q15_t * pDst,
uint32_t modifier);
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Processing function for the Q15 RFFT/RIFFT.
* @param[in] *S points to an instance of the Q15 RFFT/RIFFT structure.
* @param[in] *pSrc points to the input buffer.
* @param[out] *pDst points to the output buffer.
* @return none.
*
* \par Input an output formats:
* \par
* Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
* Hence the output format is different for different RFFT sizes.
* The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
* \par
* \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"
* \par
* \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"
*/
void arm_rfft_q15(
const arm_rfft_instance_q15 * S,
q15_t * pSrc,
q15_t * pDst)
{
const arm_cfft_radix4_instance_q15 *S_CFFT = S->pCfft;
/* Calculation of RIFFT of input */
if(S->ifftFlagR == 1u)
{
/* Real IFFT core process */
arm_split_rifft_q15(pSrc, S->fftLenBy2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
/* Complex readix-4 IFFT process */
arm_radix4_butterfly_inverse_q15(pDst, S_CFFT->fftLen,
S_CFFT->pTwiddle,
S_CFFT->twidCoefModifier);
/* Bit reversal process */
if(S->bitReverseFlagR == 1u)
{
arm_bitreversal_q15(pDst, S_CFFT->fftLen,
S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
}
}
else
{
/* Calculation of RFFT of input */
/* Complex readix-4 FFT process */
arm_radix4_butterfly_q15(pSrc, S_CFFT->fftLen,
S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);
/* Bit reversal process */
if(S->bitReverseFlagR == 1u)
{
arm_bitreversal_q15(pSrc, S_CFFT->fftLen,
S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
}
arm_split_rfft_q15(pSrc, S->fftLenBy2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
}
}
/**
* @} end of RealFFT group
*/
/**
* @brief Core Real FFT process
* @param *pSrc points to the input buffer.
* @param fftLen length of FFT.
* @param *pATable points to the A twiddle Coef buffer.
* @param *pBTable points to the B twiddle Coef buffer.
* @param *pDst points to the output buffer.
* @param modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
* The function implements a Real FFT
*/
void arm_split_rfft_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pATable,
q15_t * pBTable,
q15_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
q31_t outR, outI; /* Temporary variables for output */
q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q15_t *pSrc1, *pSrc2;
// pSrc[2u * fftLen] = pSrc[0];
// pSrc[(2u * fftLen) + 1u] = pSrc[1];
pCoefA = &pATable[modifier * 2u];
pCoefB = &pBTable[modifier * 2u];
pSrc1 = &pSrc[2];
pSrc2 = &pSrc[(2u * fftLen) - 2u];
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
i = 1u;
while(i < fftLen)
{
/*
outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
/* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
#ifndef ARM_MATH_BIG_ENDIAN
/* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */
outR = __SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA));
#else
/* -(pSrc[2 * i + 1] * pATable[2 * i + 1] - pSrc[2 * i] * pATable[2 * i]) */
outR = -(__SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA)));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
outR = __SMLAD(*__SIMD32(pSrc2), *__SIMD32(pCoefB), outR) >> 15u;
/* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
#ifndef ARM_MATH_BIG_ENDIAN
outI = __SMUSDX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
#else
outI = __SMUSDX(*__SIMD32(pCoefB), *__SIMD32(pSrc2)--);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */
outI = __SMLADX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), outI);
/* write output */
pDst[2u * i] = (q15_t) outR;
pDst[(2u * i) + 1u] = outI >> 15u;
/* write complex conjugate output */
pDst[(4u * fftLen) - (2u * i)] = (q15_t) outR;
pDst[((4u * fftLen) - (2u * i)) + 1u] = -(outI >> 15u);
/* update coefficient pointer */
pCoefB = pCoefB + (2u * modifier);
pCoefA = pCoefA + (2u * modifier);
i++;
}
pDst[2u * fftLen] = pSrc[0] - pSrc[1];
pDst[(2u * fftLen) + 1u] = 0;
pDst[0] = pSrc[0] + pSrc[1];
pDst[1] = 0;
#else
/* Run the below code for Cortex-M0 */
i = 1u;
while(i < fftLen)
{
/*
outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
outR = *pSrc1 * *pCoefA;
outR = outR - (*(pSrc1 + 1) * *(pCoefA + 1));
outR = outR + (*pSrc2 * *pCoefB);
outR = (outR + (*(pSrc2 + 1) * *(pCoefB + 1))) >> 15;
/* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
outI = *pSrc2 * *(pCoefB + 1);
outI = outI - (*(pSrc2 + 1) * *pCoefB);
outI = outI + (*(pSrc1 + 1) * *pCoefA);
outI = outI + (*pSrc1 * *(pCoefA + 1));
/* update input pointers */
pSrc1 += 2u;
pSrc2 -= 2u;
/* write output */
pDst[2u * i] = (q15_t) outR;
pDst[(2u * i) + 1u] = outI >> 15u;
/* write complex conjugate output */
pDst[(4u * fftLen) - (2u * i)] = (q15_t) outR;
pDst[((4u * fftLen) - (2u * i)) + 1u] = -(outI >> 15u);
/* update coefficient pointer */
pCoefB = pCoefB + (2u * modifier);
pCoefA = pCoefA + (2u * modifier);
i++;
}
pDst[2u * fftLen] = pSrc[0] - pSrc[1];
pDst[(2u * fftLen) + 1u] = 0;
pDst[0] = pSrc[0] + pSrc[1];
pDst[1] = 0;
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}
/**
* @brief Core Real IFFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
* The function implements a Real IFFT
*/
void arm_split_rifft_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pATable,
q15_t * pBTable,
q15_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
q31_t outR, outI; /* Temporary variables for output */
q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q15_t *pSrc1, *pSrc2;
q15_t *pDst1 = &pDst[0];
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
pSrc1 = &pSrc[0];
pSrc2 = &pSrc[2u * fftLen];
#ifndef ARM_MATH_CM0_FAMILY
/* Run the below code for Cortex-M4 and Cortex-M3 */
i = fftLen;
while(i > 0u)
{
/*
outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
#ifndef ARM_MATH_BIG_ENDIAN
/* pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
outR = __SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB));
#else
/* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] +
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */
outR = -(__SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB)));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] */
outR = __SMLAD(*__SIMD32(pSrc1), *__SIMD32(pCoefA), outR) >> 15u;
/*
-pIn[2 * n - 2 * i] * pBTable[2 * i + 1] +
pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
outI = __SMUADX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
/* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */
#ifndef ARM_MATH_BIG_ENDIAN
outI = __SMLSDX(*__SIMD32(pCoefA), *__SIMD32(pSrc1)++, -outI);
#else
outI = __SMLSDX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), -outI);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* write output */
#ifndef ARM_MATH_BIG_ENDIAN
*__SIMD32(pDst1)++ = __PKHBT(outR, (outI >> 15u), 16);
#else
*__SIMD32(pDst1)++ = __PKHBT((outI >> 15u), outR, 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* update coefficient pointer */
pCoefB = pCoefB + (2u * modifier);
pCoefA = pCoefA + (2u * modifier);
i--;
}
#else
/* Run the below code for Cortex-M0 */
i = fftLen;
while(i > 0u)
{
/*
outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
outR = *pSrc2 * *pCoefB;
outR = outR - (*(pSrc2 + 1) * *(pCoefB + 1));
outR = outR + (*pSrc1 * *pCoefA);
outR = (outR + (*(pSrc1 + 1) * *(pCoefA + 1))) >> 15;
/*
outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
outI = *(pSrc1 + 1) * *pCoefA;
outI = outI - (*pSrc1 * *(pCoefA + 1));
outI = outI - (*pSrc2 * *(pCoefB + 1));
outI = outI - (*(pSrc2 + 1) * *(pCoefB));
/* update input pointers */
pSrc1 += 2u;
pSrc2 -= 2u;
/* write output */
*pDst1++ = (q15_t) outR;
*pDst1++ = (q15_t) (outI >> 15);
/* update coefficient pointer */
pCoefB = pCoefB + (2u * modifier);
pCoefA = pCoefA + (2u * modifier);
i--;
}
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
}

View File

@ -0,0 +1,349 @@
/* ----------------------------------------------------------------------
* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
*
* $Date: 17. January 2013
* $Revision: V1.4.1
*
* Project: CMSIS DSP Library
* Title: arm_rfft_q31.c
*
* Description: RFFT & RIFFT Q31 process function
*
*
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* - Neither the name of ARM LIMITED nor the names of its contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* -------------------------------------------------------------------- */
#include "arm_math.h"
void arm_radix4_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint32_t twidCoefModifier);
void arm_radix4_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint32_t twidCoefModifier);
void arm_bitreversal_q31(
q31_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/*--------------------------------------------------------------------
* Internal functions prototypes
--------------------------------------------------------------------*/
void arm_split_rfft_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pATable,
q31_t * pBTable,
q31_t * pDst,
uint32_t modifier);
void arm_split_rifft_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pATable,
q31_t * pBTable,
q31_t * pDst,
uint32_t modifier);
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Processing function for the Q31 RFFT/RIFFT.
* @param[in] *S points to an instance of the Q31 RFFT/RIFFT structure.
* @param[in] *pSrc points to the input buffer.
* @param[out] *pDst points to the output buffer.
* @return none.
*
* \par Input an output formats:
* \par
* Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
* Hence the output format is different for different RFFT sizes.
* The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
* \par
* \image html RFFTQ31.gif "Input and Output Formats for Q31 RFFT"
*
* \par
* \image html RIFFTQ31.gif "Input and Output Formats for Q31 RIFFT"
*/
void arm_rfft_q31(
const arm_rfft_instance_q31 * S,
q31_t * pSrc,
q31_t * pDst)
{
const arm_cfft_radix4_instance_q31 *S_CFFT = S->pCfft;
/* Calculation of RIFFT of input */
if(S->ifftFlagR == 1u)
{
/* Real IFFT core process */
arm_split_rifft_q31(pSrc, S->fftLenBy2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
/* Complex readix-4 IFFT process */
arm_radix4_butterfly_inverse_q31(pDst, S_CFFT->fftLen,
S_CFFT->pTwiddle,
S_CFFT->twidCoefModifier);
/* Bit reversal process */
if(S->bitReverseFlagR == 1u)
{
arm_bitreversal_q31(pDst, S_CFFT->fftLen,
S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
}
}
else
{
/* Calculation of RFFT of input */
/* Complex readix-4 FFT process */
arm_radix4_butterfly_q31(pSrc, S_CFFT->fftLen,
S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);
/* Bit reversal process */
if(S->bitReverseFlagR == 1u)
{
arm_bitreversal_q31(pSrc, S_CFFT->fftLen,
S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
}
/* Real FFT core process */
arm_split_rfft_q31(pSrc, S->fftLenBy2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
}
}
/**
* @} end of RealFFT group
*/
/**
* @brief Core Real FFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_split_rfft_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pATable,
q31_t * pBTable,
q31_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
q31_t outR, outI; /* Temporary variables for output */
q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
q31_t *pOut1 = &pDst[2], *pOut2 = &pDst[(4u * fftLen) - 1u];
q31_t *pIn1 = &pSrc[2], *pIn2 = &pSrc[(2u * fftLen) - 1u];
/* Init coefficient pointers */
pCoefA = &pATable[modifier * 2u];
pCoefB = &pBTable[modifier * 2u];
i = fftLen - 1u;
while(i > 0u)
{
/*
outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
/* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
CoefA1 = *pCoefA++;
CoefA2 = *pCoefA;
/* outR = (pSrc[2 * i] * pATable[2 * i] */
outR = ((int32_t) (((q63_t) * pIn1 * CoefA1) >> 32));
/* outI = pIn[2 * i] * pATable[2 * i + 1] */
outI = ((int32_t) (((q63_t) * pIn1++ * CoefA2) >> 32));
/* - pSrc[2 * i + 1] * pATable[2 * i + 1] */
outR =
(q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn1 * (-CoefA2))) >> 32);
/* (pIn[2 * i + 1] * pATable[2 * i] */
outI =
(q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn1++ * (CoefA1))) >> 32);
/* pSrc[2 * n - 2 * i] * pBTable[2 * i] */
outR =
(q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn2 * (-CoefA2))) >> 32);
CoefB1 = *pCoefB;
/* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
outI =
(q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn2-- * (-CoefB1))) >> 32);
/* pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
outR =
(q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn2 * (CoefB1))) >> 32);
/* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
outI =
(q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn2-- * (-CoefA2))) >> 32);
/* write output */
*pOut1++ = (outR << 1u);
*pOut1++ = (outI << 1u);
/* write complex conjugate output */
*pOut2-- = -(outI << 1u);
*pOut2-- = (outR << 1u);
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2u);
pCoefA = pCoefA + ((modifier * 2u) - 1u);
i--;
}
pDst[2u * fftLen] = pSrc[0] - pSrc[1];
pDst[(2u * fftLen) + 1u] = 0;
pDst[0] = pSrc[0] + pSrc[1];
pDst[1] = 0;
}
/**
* @brief Core Real IFFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_split_rifft_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pATable,
q31_t * pBTable,
q31_t * pDst,
uint32_t modifier)
{
q31_t outR, outI; /* Temporary variables for output */
q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
q31_t *pIn1 = &pSrc[0], *pIn2 = &pSrc[(2u * fftLen) + 1u];
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
while(fftLen > 0u)
{
/*
outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
CoefA1 = *pCoefA++;
CoefA2 = *pCoefA;
/* outR = (pIn[2 * i] * pATable[2 * i] */
outR = ((int32_t) (((q63_t) * pIn1 * CoefA1) >> 32));
/* - pIn[2 * i] * pATable[2 * i + 1] */
outI = -((int32_t) (((q63_t) * pIn1++ * CoefA2) >> 32));
/* pIn[2 * i + 1] * pATable[2 * i + 1] */
outR =
(q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn1 * (CoefA2))) >> 32);
/* pIn[2 * i + 1] * pATable[2 * i] */
outI =
(q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn1++ * (CoefA1))) >> 32);
/* pIn[2 * n - 2 * i] * pBTable[2 * i] */
outR =
(q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn2 * (CoefA2))) >> 32);
CoefB1 = *pCoefB;
/* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
outI =
(q31_t) ((((q63_t) outI << 32) - ((q63_t) * pIn2-- * (CoefB1))) >> 32);
/* pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
outR =
(q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn2 * (CoefB1))) >> 32);
/* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
outI =
(q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn2-- * (CoefA2))) >> 32);
/* write output */
*pDst++ = (outR << 1u);
*pDst++ = (outI << 1u);
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2u);
pCoefA = pCoefA + ((modifier * 2u) - 1u);
/* Decrement loop count */
fftLen--;
}
}