Squashed 'tmk_core/' changes from 7967731..b9e0ea0

b9e0ea0 Merge commit '7fa9d8bdea3773d1195b04d98fcf27cf48ddd81d' as 'tool/mbed/mbed-sdk' 7fa9d8b Squashed 'tool/mbed/mbed-sdk/' content from commit 7c21ce5 git-subtree-dir: tmk_core git-subtree-split: b9e0ea08cb940de20b3610ecdda18e9d8cd7c552
2015-04-24 16:26:14 +09:00
parent a20ef7052c
commit 1fe4406f37
4198 changed files with 2016457 additions and 0 deletions
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_bitreversal.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_bitreversal.c
@ -0,0 +1,242 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_bitreversal.c    
+*    
+* Description:	This file has common tables like Bitreverse, reciprocal etc which are used across different functions    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.  
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/*    
+* @brief  In-place bit reversal function.   
+* @param[in, out] *pSrc        points to the in-place buffer of floating-point data type.   
+* @param[in]      fftSize      length of the FFT.   
+* @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table.   
+* @param[in]      *pBitRevTab  points to the bit reversal table.   
+* @return none.   
+*/
+
+void arm_bitreversal_f32(
+float32_t * pSrc,
+uint16_t fftSize,
+uint16_t bitRevFactor,
+uint16_t * pBitRevTab)
+{
+   uint16_t fftLenBy2, fftLenBy2p1;
+   uint16_t i, j;
+   float32_t in;
+
+   /*  Initializations */
+   j = 0u;
+   fftLenBy2 = fftSize >> 1u;
+   fftLenBy2p1 = (fftSize >> 1u) + 1u;
+
+   /* Bit Reversal Implementation */
+   for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u)
+   {
+      if(i < j)
+      {
+         /*  pSrc[i] <-> pSrc[j]; */
+         in = pSrc[2u * i];
+         pSrc[2u * i] = pSrc[2u * j];
+         pSrc[2u * j] = in;
+
+         /*  pSrc[i+1u] <-> pSrc[j+1u] */
+         in = pSrc[(2u * i) + 1u];
+         pSrc[(2u * i) + 1u] = pSrc[(2u * j) + 1u];
+         pSrc[(2u * j) + 1u] = in;
+
+         /*  pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
+         in = pSrc[2u * (i + fftLenBy2p1)];
+         pSrc[2u * (i + fftLenBy2p1)] = pSrc[2u * (j + fftLenBy2p1)];
+         pSrc[2u * (j + fftLenBy2p1)] = in;
+
+         /*  pSrc[i+fftLenBy2p1+1u] <-> pSrc[j+fftLenBy2p1+1u] */
+         in = pSrc[(2u * (i + fftLenBy2p1)) + 1u];
+         pSrc[(2u * (i + fftLenBy2p1)) + 1u] =
+         pSrc[(2u * (j + fftLenBy2p1)) + 1u];
+         pSrc[(2u * (j + fftLenBy2p1)) + 1u] = in;
+
+      }
+
+      /*  pSrc[i+1u] <-> pSrc[j+1u] */
+      in = pSrc[2u * (i + 1u)];
+      pSrc[2u * (i + 1u)] = pSrc[2u * (j + fftLenBy2)];
+      pSrc[2u * (j + fftLenBy2)] = in;
+
+      /*  pSrc[i+2u] <-> pSrc[j+2u] */
+      in = pSrc[(2u * (i + 1u)) + 1u];
+      pSrc[(2u * (i + 1u)) + 1u] = pSrc[(2u * (j + fftLenBy2)) + 1u];
+      pSrc[(2u * (j + fftLenBy2)) + 1u] = in;
+
+      /*  Reading the index for the bit reversal */
+      j = *pBitRevTab;
+
+      /*  Updating the bit reversal index depending on the fft length  */
+      pBitRevTab += bitRevFactor;
+   }
+}
+
+
+
+/*    
+* @brief  In-place bit reversal function.   
+* @param[in, out] *pSrc        points to the in-place buffer of Q31 data type.   
+* @param[in]      fftLen       length of the FFT.   
+* @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table   
+* @param[in]      *pBitRevTab  points to bit reversal table.   
+* @return none.   
+*/
+
+void arm_bitreversal_q31(
+q31_t * pSrc,
+uint32_t fftLen,
+uint16_t bitRevFactor,
+uint16_t * pBitRevTable)
+{
+   uint32_t fftLenBy2, fftLenBy2p1, i, j;
+   q31_t in;
+
+   /*  Initializations      */
+   j = 0u;
+   fftLenBy2 = fftLen / 2u;
+   fftLenBy2p1 = (fftLen / 2u) + 1u;
+
+   /* Bit Reversal Implementation */
+   for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u)
+   {
+      if(i < j)
+      {
+         /*  pSrc[i] <-> pSrc[j]; */
+         in = pSrc[2u * i];
+         pSrc[2u * i] = pSrc[2u * j];
+         pSrc[2u * j] = in;
+
+         /*  pSrc[i+1u] <-> pSrc[j+1u] */
+         in = pSrc[(2u * i) + 1u];
+         pSrc[(2u * i) + 1u] = pSrc[(2u * j) + 1u];
+         pSrc[(2u * j) + 1u] = in;
+
+         /*  pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
+         in = pSrc[2u * (i + fftLenBy2p1)];
+         pSrc[2u * (i + fftLenBy2p1)] = pSrc[2u * (j + fftLenBy2p1)];
+         pSrc[2u * (j + fftLenBy2p1)] = in;
+
+         /*  pSrc[i+fftLenBy2p1+1u] <-> pSrc[j+fftLenBy2p1+1u] */
+         in = pSrc[(2u * (i + fftLenBy2p1)) + 1u];
+         pSrc[(2u * (i + fftLenBy2p1)) + 1u] =
+         pSrc[(2u * (j + fftLenBy2p1)) + 1u];
+         pSrc[(2u * (j + fftLenBy2p1)) + 1u] = in;
+
+      }
+
+      /*  pSrc[i+1u] <-> pSrc[j+1u] */
+      in = pSrc[2u * (i + 1u)];
+      pSrc[2u * (i + 1u)] = pSrc[2u * (j + fftLenBy2)];
+      pSrc[2u * (j + fftLenBy2)] = in;
+
+      /*  pSrc[i+2u] <-> pSrc[j+2u] */
+      in = pSrc[(2u * (i + 1u)) + 1u];
+      pSrc[(2u * (i + 1u)) + 1u] = pSrc[(2u * (j + fftLenBy2)) + 1u];
+      pSrc[(2u * (j + fftLenBy2)) + 1u] = in;
+
+      /*  Reading the index for the bit reversal */
+      j = *pBitRevTable;
+
+      /*  Updating the bit reversal index depending on the fft length */
+      pBitRevTable += bitRevFactor;
+   }
+}
+
+
+
+/*    
+   * @brief  In-place bit reversal function.   
+   * @param[in, out] *pSrc        points to the in-place buffer of Q15 data type.   
+   * @param[in]      fftLen       length of the FFT.   
+   * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table   
+   * @param[in]      *pBitRevTab  points to bit reversal table.   
+   * @return none.   
+*/
+
+void arm_bitreversal_q15(
+q15_t * pSrc16,
+uint32_t fftLen,
+uint16_t bitRevFactor,
+uint16_t * pBitRevTab)
+{
+   q31_t *pSrc = (q31_t *) pSrc16;
+   q31_t in;
+   uint32_t fftLenBy2, fftLenBy2p1;
+   uint32_t i, j;
+
+   /*  Initializations */
+   j = 0u;
+   fftLenBy2 = fftLen / 2u;
+   fftLenBy2p1 = (fftLen / 2u) + 1u;
+
+   /* Bit Reversal Implementation */
+   for (i = 0u; i <= (fftLenBy2 - 2u); i += 2u)
+   {
+      if(i < j)
+      {
+         /*  pSrc[i] <-> pSrc[j]; */
+         /*  pSrc[i+1u] <-> pSrc[j+1u] */
+         in = pSrc[i];
+         pSrc[i] = pSrc[j];
+         pSrc[j] = in;
+
+         /*  pSrc[i + fftLenBy2p1] <-> pSrc[j + fftLenBy2p1];  */
+         /*  pSrc[i + fftLenBy2p1+1u] <-> pSrc[j + fftLenBy2p1+1u] */
+         in = pSrc[i + fftLenBy2p1];
+         pSrc[i + fftLenBy2p1] = pSrc[j + fftLenBy2p1];
+         pSrc[j + fftLenBy2p1] = in;
+      }
+
+      /*  pSrc[i+1u] <-> pSrc[j+fftLenBy2];         */
+      /*  pSrc[i+2] <-> pSrc[j+fftLenBy2+1u]  */
+      in = pSrc[i + 1u];
+      pSrc[i + 1u] = pSrc[j + fftLenBy2];
+      pSrc[j + fftLenBy2] = in;
+
+      /*  Reading the index for the bit reversal */
+      j = *pBitRevTab;
+
+      /*  Updating the bit reversal index depending on the fft length  */
+      pBitRevTab += bitRevFactor;
+   }
+}
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_bitreversal2.S
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_bitreversal2.S
@ -0,0 +1,148 @@
+;/* ----------------------------------------------------------------------    
+;* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+;*    
+;* $Date:        17. January 2013  
+;* $Revision: 	V1.4.1  
+;*    
+;* Project: 	    CMSIS DSP Library    
+;* Title:	    arm_bitreversal2.S   
+;*    
+;* Description:	This is the arm_bitreversal_32 function done in
+;*                assembly for maximum speed.  This function is called
+;*                after doing an fft to reorder the output.  The function
+;*                is loop unrolled by 2.
+;*    
+;* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+;*  
+;* Redistribution and use in source and binary forms, with or without 
+;* modification, are permitted provided that the following conditions
+;* are met:
+;*   - Redistributions of source code must retain the above copyright
+;*     notice, this list of conditions and the following disclaimer.
+;*   - Redistributions in binary form must reproduce the above copyright
+;*     notice, this list of conditions and the following disclaimer in
+;*     the documentation and/or other materials provided with the 
+;*     distribution.
+;*   - Neither the name of ARM LIMITED nor the names of its contributors
+;*     may be used to endorse or promote products derived from this
+;*     software without specific prior written permission.
+;*
+;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+;* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+;* POSSIBILITY OF SUCH DAMAGE.   
+;* -------------------------------------------------------------------- */
+#if defined(__CC_ARM) //Keil
+	#define CODESECT AREA     ||.text||, CODE, READONLY, ALIGN=2
+	#define LABEL
+#elif defined(__IASMARM__) //IAR
+	#define CODESECT SECTION `.text`:CODE
+	#define PROC 
+	#define LABEL 
+	#define ENDP
+	#define EXPORT PUBLIC
+#elif defined (__GNUC__) //GCC
+	.syntax unified
+	.cpu cortex-m4
+	.fpu softvfp
+	#define THUMB .thumb
+	#define CODESECT .section text
+	#define EXPORT .global
+	#define PROC :
+	#define LABEL :
+	#define ENDP
+	#define END
+#endif
+
+	CODESECT
+	THUMB
+
+;/*    
+;* @brief  In-place bit reversal function.   
+;* @param[in, out] *pSrc        points to the in-place buffer of unknown 32-bit data type. 
+;* @param[in]      bitRevLen    bit reversal table length
+;* @param[in]      *pBitRevTab  points to bit reversal table.   
+;* @return none.   
+;*/
+	EXPORT arm_bitreversal_32
+   
+#if defined(ARM_MATH_CM0) || defined(ARM_MATH_CM0PLUS)
+
+arm_bitreversal_32 PROC
+	ADDS     r3,r1,#1
+	PUSH     {r4-r6}           
+	ADDS     r1,r2,#0          
+	LSRS     r3,r3,#1          
+arm_bitreversal_32_0 LABEL
+	LDRH     r2,[r1,#2]        
+	LDRH     r6,[r1,#0]    
+	ADD      r2,r0,r2   
+	ADD      r6,r0,r6   
+	LDR      r5,[r2,#0] 
+	LDR      r4,[r6,#0]
+	STR      r5,[r6,#0]
+	STR      r4,[r2,#0]    
+	LDR      r5,[r2,#4]        
+	LDR      r4,[r6,#4]       
+	STR      r5,[r6,#4]       
+	STR      r4,[r2,#4]        
+	ADDS     r1,r1,#4          
+	SUBS     r3,r3,#1          
+	BNE      arm_bitreversal_32_0
+	POP      {r4-r6}
+	BX       lr
+	ENDP
+
+#else
+
+arm_bitreversal_32 PROC
+	ADDS     r3,r1,#1
+	CMP      r3,#1
+	IT       LS
+	BXLS     lr
+	PUSH     {r4-r9}           
+	ADDS     r1,r2,#2          
+	LSRS     r3,r3,#2          
+arm_bitreversal_32_0 LABEL       ;/* loop unrolled by 2 */
+	LDRH     r8,[r1,#4]        
+	LDRH     r9,[r1,#2]  
+	LDRH     r2,[r1,#0]        
+	LDRH     r12,[r1,#-2]    
+	ADD      r8,r0,r8   
+	ADD      r9,r0,r9    
+	ADD      r2,r0,r2   
+	ADD      r12,r0,r12   
+	LDR      r7,[r9,#0] 
+	LDR      r6,[r8,#0]
+	LDR      r5,[r2,#0] 
+	LDR      r4,[r12,#0]
+	STR      r6,[r9,#0]
+	STR      r7,[r8,#0] 
+	STR      r5,[r12,#0]
+	STR      r4,[r2,#0]      
+	LDR      r7,[r9,#4]        
+	LDR      r6,[r8,#4] 
+	LDR      r5,[r2,#4]        
+	LDR      r4,[r12,#4]           
+	STR      r6,[r9,#4]       
+	STR      r7,[r8,#4]   
+	STR      r5,[r12,#4]       
+	STR      r4,[r2,#4]        
+	ADDS     r1,r1,#8          
+	SUBS     r3,r3,#1          
+	BNE      arm_bitreversal_32_0
+	POP      {r4-r9}
+	BX       lr
+	ENDP
+   
+#endif
+   
+	END
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_f32.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_f32.c
@ -0,0 +1,616 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_cfft_f32.c   
+*    
+* Description:	Combined Radix Decimation in Frequency CFFT Floating point processing function
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.   
+* -------------------------------------------------------------------- */
+
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+extern void arm_radix8_butterfly_f32(
+  float32_t * pSrc,
+  uint16_t fftLen,
+  const float32_t * pCoef,
+  uint16_t twidCoefModifier);
+
+extern void arm_bitreversal_32(
+		uint32_t * pSrc,
+		const uint16_t bitRevLen,
+		const uint16_t * pBitRevTable);
+
+/**   
+* @ingroup groupTransforms   
+*/
+
+/**   
+* @defgroup ComplexFFT Complex FFT Functions   
+*   
+* \par
+* The Fast Fourier Transform (FFT) is an efficient algorithm for computing the
+* Discrete Fourier Transform (DFT).  The FFT can be orders of magnitude faster
+* than the DFT, especially for long lengths.
+* The algorithms described in this section
+* operate on complex data.  A separate set of functions is devoted to handling
+* of real sequences.
+* \par
+* There are separate algorithms for handling floating-point, Q15, and Q31 data
+* types.  The algorithms available for each data type are described next.
+* \par
+* The FFT functions operate in-place.  That is, the array holding the input data
+* will also be used to hold the corresponding result.  The input data is complex
+* and contains <code>2*fftLen</code> interleaved values as shown below.
+* <pre> {real[0], imag[0], real[1], imag[1],..} </pre>
+* The FFT result will be contained in the same array and the frequency domain
+* values will have the same interleaving.
+*
+* \par Floating-point
+* The floating-point complex FFT uses a mixed-radix algorithm.  Multiple radix-8
+* stages are performed along with a single radix-2 or radix-4 stage, as needed.
+* The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
+* a different twiddle factor table.  
+* \par
+* The function uses the standard FFT definition and output values may grow by a
+* factor of <code>fftLen</code> when computing the forward transform.  The
+* inverse transform includes a scale of <code>1/fftLen</code> as part of the
+* calculation and this matches the textbook definition of the inverse FFT.
+* \par
+* Preinitialized data structures containing twiddle factors and bit reversal
+* tables are provided and defined in <code>arm_const_structs.h</code>.  Include 
+* this header in your function and then pass one of the constant structures as 
+* an argument to arm_cfft_f32.  For example:
+* \par
+* <code>arm_cfft_f32(arm_cfft_sR_f32_len64, pSrc, 1, 1)</code>
+* \par
+* computes a 64-point inverse complex FFT including bit reversal.
+* The data structures are treated as constant data and not modified during the
+* calculation.  The same data structure can be reused for multiple transforms
+* including mixing forward and inverse transforms.
+* \par
+* Earlier releases of the library provided separate radix-2 and radix-4
+* algorithms that operated on floating-point data.  These functions are still
+* provided but are deprecated.  The older functions are slower and less general
+* than the new functions.
+* \par
+* An example of initialization of the constants for the arm_cfft_f32 function follows:
+* \par
+* const static arm_cfft_instance_f32 *S;
+* ...
+*		switch (length) {
+*    		case 16:
+*    			S = & arm_cfft_sR_f32_len16;
+*    		break;
+*    		case 32:
+*    			S = & arm_cfft_sR_f32_len32;
+*    		break;
+*			case 64:
+*    			S = & arm_cfft_sR_f32_len64;
+*    		break;
+*    		case 128:
+*    			S = & arm_cfft_sR_f32_len128;
+*    		break;
+*    		case 256:
+*    			S = & arm_cfft_sR_f32_len256;
+*    		break;
+*    		case 512:
+*    			S = & arm_cfft_sR_f32_len512;
+*    		break;
+*    		case 1024:
+*    			S = & arm_cfft_sR_f32_len1024;
+*    		break;
+*    		case 2048:
+*    			S = & arm_cfft_sR_f32_len2048;
+*    		break;
+*    		case 4096:
+*    			S = & arm_cfft_sR_f32_len4096;
+*    		break;
+*			}
+* \par Q15 and Q31
+* The library provides radix-2 and radix-4 FFT algorithms for fixed-point data.  The
+* radix-2 algorithm supports lengths of [16, 32, 64, ..., 4096].  The radix-4
+* algorithm supports lengths of [16, 64, 256, ..., 4096].  When possible, you
+* should use the radix-4 algorithm since it is faster than the radix-2 of the
+* same length.
+* \par
+* The forward FFTs include scaling in order to prevent results from overflowing.
+* Intermediate results are scaled down during each butterfly stage.  In the
+* radix-2 algorithm, a scale of 0.5 is applied during each butterfly.  In the
+* radix-4 algorithm, a scale of 0.25 is applied.  The scaling applies to both
+* the forward and the inverse FFTs.  Thus the forward FFT contains an additional
+* scale factor of <code>1/fftLen</code> as compared to the standard textbook
+* definition of the FFT.  The inverse FFT also scales down during each butterfly
+* stage and this corresponds to the standard textbook definition.
+* \par
+* A separate instance structure must be defined for each transform used but
+* twiddle factor and bit reversal tables can be reused.
+* \par 
+* There is also an associated initialization function for each data type.   
+* The initialization function performs the following operations:   
+* - Sets the values of the internal structure fields.   
+* - Initializes twiddle factor table and bit reversal table pointers.
+* \par   
+* Use of the initialization function is optional.   
+* However, if the initialization function is used, then the instance structure 
+* cannot be placed into a const data section. To place an instance structure 
+* into a const data section, the instance structure should be manually 
+* initialized as follows:
+* <pre>   
+*arm_cfft_radix2_instance_q31 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};   
+*arm_cfft_radix2_instance_q15 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};   
+*arm_cfft_radix4_instance_q31 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};    
+*arm_cfft_radix4_instance_q15 S = {fftLen, ifftFlag, bitReverseFlag, pTwiddle, pBitRevTable, twidCoefModifier, bitRevFactor};    
+*arm_cfft_instance_f32 S = {fftLen, pTwiddle, pBitRevTable, bitRevLength};
+* </pre>   
+* \par   
+* where <code>fftLen</code> length of CFFT/CIFFT; <code>ifftFlag</code> Flag for
+* selection of forward or inverse transform.  When ifftFlag is set the inverse
+* transform is calculated.
+* <code>bitReverseFlag</code> Flag for selection of output order (Set bitReverseFlag to output in normal order otherwise output in bit reversed order);    
+* <code>pTwiddle</code>points to array of twiddle coefficients; <code>pBitRevTable</code> points to the bit reversal table.   
+* <code>twidCoefModifier</code> modifier for twiddle factor table which supports all FFT lengths with same table;    
+* <code>pBitRevTable</code> modifier for bit reversal table which supports all FFT lengths with same table.   
+* <code>onebyfftLen</code> value of 1/fftLen to calculate CIFFT;
+* \par
+* The Q15 and Q31 FFT functions use a large bit reversal and twiddle factor
+* table.  The tables are defined for the maximum length transform and a subset
+* of the coefficients are used in shorter transforms.
+* 
+*/
+
+void arm_cfft_radix8by2_f32( arm_cfft_instance_f32 * S, float32_t * p1) 
+{
+   uint32_t    L  = S->fftLen;
+   float32_t * pCol1, * pCol2, * pMid1, * pMid2;
+   float32_t * p2 = p1 + L;
+   const float32_t * tw = (float32_t *) S->pTwiddle;
+   float32_t t1[4], t2[4], t3[4], t4[4], twR, twI;
+   float32_t m0, m1, m2, m3;
+   uint32_t l;
+
+   pCol1 = p1;
+   pCol2 = p2;
+
+   //    Define new length
+   L >>= 1;
+   //    Initialize mid pointers
+   pMid1 = p1 + L;
+   pMid2 = p2 + L;
+
+   // do two dot Fourier transform
+   for ( l = L >> 2; l > 0; l-- ) 
+   {
+      t1[0] = p1[0];
+      t1[1] = p1[1];
+      t1[2] = p1[2];
+      t1[3] = p1[3];
+
+      t2[0] = p2[0];
+      t2[1] = p2[1];
+      t2[2] = p2[2];
+      t2[3] = p2[3];
+
+      t3[0] = pMid1[0];
+      t3[1] = pMid1[1];
+      t3[2] = pMid1[2];
+      t3[3] = pMid1[3];
+
+      t4[0] = pMid2[0];
+      t4[1] = pMid2[1];
+      t4[2] = pMid2[2];
+      t4[3] = pMid2[3];
+
+      *p1++ = t1[0] + t2[0];
+      *p1++ = t1[1] + t2[1];
+      *p1++ = t1[2] + t2[2];
+      *p1++ = t1[3] + t2[3];    // col 1
+
+      t2[0] = t1[0] - t2[0];
+      t2[1] = t1[1] - t2[1];
+      t2[2] = t1[2] - t2[2];
+      t2[3] = t1[3] - t2[3];    // for col 2
+
+      *pMid1++ = t3[0] + t4[0];
+      *pMid1++ = t3[1] + t4[1];
+      *pMid1++ = t3[2] + t4[2];
+      *pMid1++ = t3[3] + t4[3]; // col 1
+
+      t4[0] = t4[0] - t3[0];
+      t4[1] = t4[1] - t3[1];
+      t4[2] = t4[2] - t3[2];
+      t4[3] = t4[3] - t3[3];    // for col 2
+
+      twR = *tw++;
+      twI = *tw++;
+
+      // multiply by twiddle factors
+      m0 = t2[0] * twR;
+      m1 = t2[1] * twI;
+      m2 = t2[1] * twR;
+      m3 = t2[0] * twI;
+      
+      // R  =  R  *  Tr - I * Ti
+      *p2++ = m0 + m1;
+      // I  =  I  *  Tr + R * Ti
+      *p2++ = m2 - m3;
+      
+      // use vertical symmetry
+	  //  0.9988 - 0.0491i <==> -0.0491 - 0.9988i
+      m0 = t4[0] * twI;
+      m1 = t4[1] * twR;
+      m2 = t4[1] * twI;
+      m3 = t4[0] * twR;
+      
+      *pMid2++ = m0 - m1;
+      *pMid2++ = m2 + m3;
+
+      twR = *tw++;
+      twI = *tw++;
+      
+      m0 = t2[2] * twR;
+      m1 = t2[3] * twI;
+      m2 = t2[3] * twR;
+      m3 = t2[2] * twI;
+      
+      *p2++ = m0 + m1;
+      *p2++ = m2 - m3;
+         
+      m0 = t4[2] * twI;
+      m1 = t4[3] * twR;
+      m2 = t4[3] * twI;
+      m3 = t4[2] * twR;
+      
+      *pMid2++ = m0 - m1;
+      *pMid2++ = m2 + m3;
+   }
+
+   // first col
+   arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 2u);
+   // second col
+   arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 2u);
+   
+}
+
+void arm_cfft_radix8by4_f32( arm_cfft_instance_f32 * S, float32_t * p1) 
+{
+   uint32_t    L  = S->fftLen >> 1;
+   float32_t * pCol1, *pCol2, *pCol3, *pCol4, *pEnd1, *pEnd2, *pEnd3, *pEnd4;
+	const float32_t *tw2, *tw3, *tw4;
+   float32_t * p2 = p1 + L;
+   float32_t * p3 = p2 + L;
+   float32_t * p4 = p3 + L;
+   float32_t t2[4], t3[4], t4[4], twR, twI;
+   float32_t p1ap3_0, p1sp3_0, p1ap3_1, p1sp3_1;
+   float32_t m0, m1, m2, m3;
+   uint32_t l, twMod2, twMod3, twMod4;
+
+   pCol1 = p1;         // points to real values by default
+   pCol2 = p2;
+   pCol3 = p3;
+   pCol4 = p4;
+   pEnd1 = p2 - 1;     // points to imaginary values by default
+   pEnd2 = p3 - 1;
+   pEnd3 = p4 - 1;
+   pEnd4 = pEnd3 + L;
+   
+   tw2 = tw3 = tw4 = (float32_t *) S->pTwiddle;
+   
+   L >>= 1;
+
+   // do four dot Fourier transform
+
+   twMod2 = 2;
+   twMod3 = 4;
+   twMod4 = 6;
+
+   // TOP
+   p1ap3_0 = p1[0] + p3[0];
+   p1sp3_0 = p1[0] - p3[0];
+   p1ap3_1 = p1[1] + p3[1];
+   p1sp3_1 = p1[1] - p3[1];
+
+   // col 2
+   t2[0] = p1sp3_0 + p2[1] - p4[1];
+   t2[1] = p1sp3_1 - p2[0] + p4[0];
+   // col 3
+   t3[0] = p1ap3_0 - p2[0] - p4[0];
+   t3[1] = p1ap3_1 - p2[1] - p4[1];
+   // col 4
+   t4[0] = p1sp3_0 - p2[1] + p4[1];
+   t4[1] = p1sp3_1 + p2[0] - p4[0];
+   // col 1
+   *p1++ = p1ap3_0 + p2[0] + p4[0];
+   *p1++ = p1ap3_1 + p2[1] + p4[1];
+
+   // Twiddle factors are ones
+   *p2++ = t2[0];
+   *p2++ = t2[1];
+   *p3++ = t3[0];
+   *p3++ = t3[1];
+   *p4++ = t4[0];
+   *p4++ = t4[1];
+   
+   tw2 += twMod2;
+   tw3 += twMod3;
+   tw4 += twMod4;
+   
+   for (l = (L - 2) >> 1; l > 0; l-- ) 
+   {
+
+      // TOP
+      p1ap3_0 = p1[0] + p3[0];
+      p1sp3_0 = p1[0] - p3[0];
+      p1ap3_1 = p1[1] + p3[1];
+      p1sp3_1 = p1[1] - p3[1];
+      // col 2
+      t2[0] = p1sp3_0 + p2[1] - p4[1];
+      t2[1] = p1sp3_1 - p2[0] + p4[0];
+      // col 3
+      t3[0] = p1ap3_0 - p2[0] - p4[0];
+      t3[1] = p1ap3_1 - p2[1] - p4[1];
+      // col 4
+      t4[0] = p1sp3_0 - p2[1] + p4[1];
+      t4[1] = p1sp3_1 + p2[0] - p4[0];
+      // col 1 - top
+      *p1++ = p1ap3_0 + p2[0] + p4[0];
+      *p1++ = p1ap3_1 + p2[1] + p4[1];
+
+      // BOTTOM
+      p1ap3_1 = pEnd1[-1] + pEnd3[-1];
+      p1sp3_1 = pEnd1[-1] - pEnd3[-1];
+      p1ap3_0 = pEnd1[0] + pEnd3[0];
+      p1sp3_0 = pEnd1[0] - pEnd3[0];
+      // col 2
+      t2[2] = pEnd2[0]  - pEnd4[0] + p1sp3_1;
+      t2[3] = pEnd1[0] - pEnd3[0] - pEnd2[-1] + pEnd4[-1];
+      // col 3
+      t3[2] = p1ap3_1 - pEnd2[-1] - pEnd4[-1];
+      t3[3] = p1ap3_0 - pEnd2[0]  - pEnd4[0];
+      // col 4
+      t4[2] = pEnd2[0]  - pEnd4[0]  - p1sp3_1;
+      t4[3] = pEnd4[-1] - pEnd2[-1] - p1sp3_0;
+      // col 1 - Bottom
+      *pEnd1-- = p1ap3_0 + pEnd2[0] + pEnd4[0];
+      *pEnd1-- = p1ap3_1 + pEnd2[-1] + pEnd4[-1];
+
+      // COL 2
+      // read twiddle factors
+      twR = *tw2++;
+      twI = *tw2++;
+      // multiply by twiddle factors
+      //  let    Z1 = a + i(b),   Z2 = c + i(d)
+      //   =>  Z1 * Z2  =  (a*c - b*d) + i(b*c + a*d)
+      // Top
+      m0 = t2[0] * twR;
+      m1 = t2[1] * twI;
+      m2 = t2[1] * twR;
+      m3 = t2[0] * twI;
+      
+      *p2++ = m0 + m1;
+      *p2++ = m2 - m3;
+      // use vertical symmetry col 2
+      // 0.9997 - 0.0245i  <==>  0.0245 - 0.9997i
+      // Bottom
+      m0 = t2[3] * twI;
+      m1 = t2[2] * twR;
+      m2 = t2[2] * twI;
+      m3 = t2[3] * twR;
+      
+      *pEnd2-- = m0 - m1;
+      *pEnd2-- = m2 + m3;
+
+      // COL 3
+      twR = tw3[0];
+      twI = tw3[1];
+      tw3 += twMod3;
+      // Top
+      m0 = t3[0] * twR;
+      m1 = t3[1] * twI;
+      m2 = t3[1] * twR;
+      m3 = t3[0] * twI;
+      
+      *p3++ = m0 + m1;
+      *p3++ = m2 - m3;
+      // use vertical symmetry col 3
+      // 0.9988 - 0.0491i  <==>  -0.9988 - 0.0491i
+      // Bottom
+      m0 = -t3[3] * twR;
+      m1 = t3[2] * twI;
+      m2 = t3[2] * twR;
+      m3 = t3[3] * twI;
+      
+      *pEnd3-- = m0 - m1;
+      *pEnd3-- = m3 - m2;
+      
+      // COL 4
+      twR = tw4[0];
+      twI = tw4[1];
+      tw4 += twMod4;
+      // Top
+      m0 = t4[0] * twR;
+      m1 = t4[1] * twI;
+      m2 = t4[1] * twR;
+      m3 = t4[0] * twI;
+      
+      *p4++ = m0 + m1;
+      *p4++ = m2 - m3;
+      // use vertical symmetry col 4
+      // 0.9973 - 0.0736i  <==>  -0.0736 + 0.9973i
+      // Bottom
+      m0 = t4[3] * twI;
+      m1 = t4[2] * twR;
+      m2 = t4[2] * twI;
+      m3 = t4[3] * twR;
+      
+      *pEnd4-- = m0 - m1;
+      *pEnd4-- = m2 + m3;
+   }
+
+   //MIDDLE
+   // Twiddle factors are 
+   //  1.0000  0.7071-0.7071i  -1.0000i  -0.7071-0.7071i
+   p1ap3_0 = p1[0] + p3[0];
+   p1sp3_0 = p1[0] - p3[0];
+   p1ap3_1 = p1[1] + p3[1];
+   p1sp3_1 = p1[1] - p3[1];
+
+   // col 2
+   t2[0] = p1sp3_0 + p2[1] - p4[1];
+   t2[1] = p1sp3_1 - p2[0] + p4[0];
+   // col 3
+   t3[0] = p1ap3_0 - p2[0] - p4[0];
+   t3[1] = p1ap3_1 - p2[1] - p4[1];
+   // col 4
+   t4[0] = p1sp3_0 - p2[1] + p4[1];
+   t4[1] = p1sp3_1 + p2[0] - p4[0];
+   // col 1 - Top
+   *p1++ = p1ap3_0 + p2[0] + p4[0];
+   *p1++ = p1ap3_1 + p2[1] + p4[1];
+   
+   // COL 2
+   twR = tw2[0];
+   twI = tw2[1];
+   
+   m0 = t2[0] * twR;
+   m1 = t2[1] * twI;
+   m2 = t2[1] * twR;
+   m3 = t2[0] * twI;
+   
+   *p2++ = m0 + m1;
+   *p2++ = m2 - m3;
+      // COL 3
+   twR = tw3[0];
+   twI = tw3[1];
+   
+   m0 = t3[0] * twR;
+   m1 = t3[1] * twI;
+   m2 = t3[1] * twR;
+   m3 = t3[0] * twI;
+   
+   *p3++ = m0 + m1;
+   *p3++ = m2 - m3;
+   // COL 4
+   twR = tw4[0];
+   twI = tw4[1];
+   
+   m0 = t4[0] * twR;
+   m1 = t4[1] * twI;
+   m2 = t4[1] * twR;
+   m3 = t4[0] * twI;
+   
+   *p4++ = m0 + m1;
+   *p4++ = m2 - m3;
+
+   // first col
+   arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 4u);
+   // second col
+   arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 4u);
+   // third col
+   arm_radix8_butterfly_f32( pCol3, L, (float32_t *) S->pTwiddle, 4u);
+   // fourth col
+   arm_radix8_butterfly_f32( pCol4, L, (float32_t *) S->pTwiddle, 4u);
+
+}
+
+/**
+* @addtogroup ComplexFFT   
+* @{   
+*/
+
+/**   
+* @details   
+* @brief       Processing function for the floating-point complex FFT.
+* @param[in]      *S    points to an instance of the floating-point CFFT structure.  
+* @param[in, out] *p1   points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.  
+* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.  
+* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.  
+* @return none.  
+*/
+
+void arm_cfft_f32( 
+   const arm_cfft_instance_f32 * S, 
+   float32_t * p1,
+   uint8_t ifftFlag,
+   uint8_t bitReverseFlag)
+{
+
+   uint32_t  L = S->fftLen, l;
+   float32_t invL, * pSrc;
+
+  if(ifftFlag == 1u)
+  {
+	  /*  Conjugate input data  */
+	  pSrc = p1 + 1;
+	  for(l=0; l<L; l++) {
+		  *pSrc = -*pSrc;
+		   pSrc += 2;
+	  }
+  }
+
+		switch (L) {
+		case 16: 
+		case 128:
+		case 1024:
+			 arm_cfft_radix8by2_f32  ( (arm_cfft_instance_f32 *) S, p1);
+			 break;
+		case 32:
+		case 256:
+		case 2048:
+			 arm_cfft_radix8by4_f32  ( (arm_cfft_instance_f32 *) S, p1);
+			 break;
+		case 64:
+		case 512:
+		case 4096:
+          arm_radix8_butterfly_f32( p1, L, (float32_t *) S->pTwiddle, 1);
+			 break;
+		}  
+
+	if( bitReverseFlag )
+		arm_bitreversal_32((uint32_t*)p1,S->bitRevLength,S->pBitRevTable);
+
+  if(ifftFlag == 1u)
+  {
+	  invL = 1.0f/(float32_t)L;
+	  /*  Conjugate and scale output data */
+	  pSrc = p1;
+	  for(l=0; l<L; l++) {
+  		 *pSrc++ *=   invL ;
+		 *pSrc  = -(*pSrc) * invL;
+                 pSrc++;
+	  }
+  }
+}
+
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix2_f32.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix2_f32.c
@ -0,0 +1,485 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.   
+*   
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:	    arm_cfft_radix2_f32.c   
+*   
+* Description:	Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function   
+*   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.   
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+void arm_radix2_butterfly_f32(
+  float32_t * pSrc,
+  uint32_t fftLen,
+  float32_t * pCoef,
+  uint16_t twidCoefModifier);
+
+void arm_radix2_butterfly_inverse_f32(
+  float32_t * pSrc,
+  uint32_t fftLen,
+  float32_t * pCoef,
+  uint16_t twidCoefModifier,
+  float32_t onebyfftLen);
+
+extern void arm_bitreversal_f32(
+    float32_t * pSrc,
+    uint16_t fftSize,
+    uint16_t bitRevFactor,
+    uint16_t * pBitRevTab);
+
+/**   
+* @ingroup groupTransforms   
+*/
+
+/**   
+* @addtogroup ComplexFFT   
+* @{   
+*/
+
+/**   
+* @details
+* @brief Radix-2 CFFT/CIFFT.
+* @deprecated Do not use this function.  It has been superceded by \ref arm_cfft_f32 and will be removed
+* in the future.
+* @param[in]      *S    points to an instance of the floating-point Radix-2 CFFT/CIFFT structure.  
+* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.  
+* @return none.
+*/
+
+void arm_cfft_radix2_f32(
+const arm_cfft_radix2_instance_f32 * S,
+float32_t * pSrc)
+{
+
+   if(S->ifftFlag == 1u)
+   {
+      /*  Complex IFFT radix-2  */
+      arm_radix2_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle,
+      S->twidCoefModifier, S->onebyfftLen);
+   }
+   else
+   {
+      /*  Complex FFT radix-2  */
+      arm_radix2_butterfly_f32(pSrc, S->fftLen, S->pTwiddle,
+      S->twidCoefModifier);
+   }
+
+   if(S->bitReverseFlag == 1u)
+   {
+      /*  Bit Reversal */
+      arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
+   }
+
+}
+
+
+/**    
+* @} end of ComplexFFT group    
+*/
+
+
+
+/* ----------------------------------------------------------------------    
+** Internal helper function used by the FFTs    
+** ------------------------------------------------------------------- */
+
+/*    
+* @brief  Core function for the floating-point CFFT butterfly process.   
+* @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.   
+* @param[in]      fftLen           length of the FFT.   
+* @param[in]      *pCoef           points to the twiddle coefficient buffer.   
+* @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
+* @return none.   
+*/
+
+void arm_radix2_butterfly_f32(
+float32_t * pSrc,
+uint32_t fftLen,
+float32_t * pCoef,
+uint16_t twidCoefModifier)
+{
+
+   uint32_t i, j, k, l;
+   uint32_t n1, n2, ia;
+   float32_t xt, yt, cosVal, sinVal;
+   float32_t p0, p1, p2, p3;
+   float32_t a0, a1;
+
+#ifndef ARM_MATH_CM0_FAMILY
+
+   /*  Initializations for the first stage */
+   n2 = fftLen >> 1;
+   ia = 0;
+   i = 0;
+
+   // loop for groups 
+   for (k = n2; k > 0; k--)
+   {
+      cosVal = pCoef[ia * 2];
+      sinVal = pCoef[(ia * 2) + 1];
+
+      /*  Twiddle coefficients index modifier */
+      ia += twidCoefModifier;
+
+      /*  index calculation for the input as, */
+      /*  pSrc[i + 0], pSrc[i + fftLen/1] */
+      l = i + n2;
+
+      /*  Butterfly implementation */
+      a0 = pSrc[2 * i] + pSrc[2 * l];
+      xt = pSrc[2 * i] - pSrc[2 * l];
+
+      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+      a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+      
+      p0 = xt * cosVal;
+      p1 = yt * sinVal;
+      p2 = yt * cosVal;
+      p3 = xt * sinVal;  
+      
+      pSrc[2 * i]     = a0;   
+      pSrc[2 * i + 1] = a1;       
+      
+      pSrc[2 * l]     = p0 + p1;
+      pSrc[2 * l + 1] = p2 - p3;
+      
+      i++;
+   }                             // groups loop end 
+
+   twidCoefModifier <<= 1u;
+
+   // loop for stage 
+   for (k = n2; k > 2; k = k >> 1)
+   {
+      n1 = n2;
+      n2 = n2 >> 1;
+      ia = 0;
+
+      // loop for groups 
+      j = 0;
+      do
+      {
+         cosVal = pCoef[ia * 2];
+         sinVal = pCoef[(ia * 2) + 1];
+         ia += twidCoefModifier;
+
+         // loop for butterfly 
+         i = j;
+         do
+         {
+            l = i + n2;
+            a0 = pSrc[2 * i] + pSrc[2 * l];
+            xt = pSrc[2 * i] - pSrc[2 * l];
+
+            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            
+            p0 = xt * cosVal;
+            p1 = yt * sinVal;
+            p2 = yt * cosVal;
+            p3 = xt * sinVal;  
+            
+            pSrc[2 * i] = a0;   
+            pSrc[2 * i + 1] = a1;       
+            
+            pSrc[2 * l]     = p0 + p1;
+            pSrc[2 * l + 1] = p2 - p3;
+            
+            i += n1;
+         } while( i < fftLen );                        // butterfly loop end 
+         j++;
+      } while( j < n2);                          // groups loop end 
+      twidCoefModifier <<= 1u;
+   }                             // stages loop end 
+
+   // loop for butterfly 
+   for (i = 0; i < fftLen; i += 2)
+   {
+      a0 = pSrc[2 * i] + pSrc[2 * i + 2];
+      xt = pSrc[2 * i] - pSrc[2 * i + 2];
+
+      yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
+      a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
+      
+      pSrc[2 * i] = a0;   
+      pSrc[2 * i + 1] = a1;
+      pSrc[2 * i + 2] = xt;
+      pSrc[2 * i + 3] = yt;
+   }                             // groups loop end 
+
+#else
+ 
+   n2 = fftLen;
+
+   // loop for stage 
+   for (k = fftLen; k > 1; k = k >> 1)
+   {
+      n1 = n2;
+      n2 = n2 >> 1;
+      ia = 0;
+
+      // loop for groups 
+      j = 0;
+      do
+      {
+         cosVal = pCoef[ia * 2];
+         sinVal = pCoef[(ia * 2) + 1];
+         ia += twidCoefModifier;
+
+         // loop for butterfly 
+         i = j;
+         do
+         {
+            l = i + n2;
+            a0 = pSrc[2 * i] + pSrc[2 * l];
+            xt = pSrc[2 * i] - pSrc[2 * l];
+
+            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            
+            p0 = xt * cosVal;
+            p1 = yt * sinVal;
+            p2 = yt * cosVal;
+            p3 = xt * sinVal;  
+            
+            pSrc[2 * i] = a0;   
+            pSrc[2 * i + 1] = a1;       
+            
+            pSrc[2 * l]     = p0 + p1;
+            pSrc[2 * l + 1] = p2 - p3;
+            
+            i += n1;
+         } while(i < fftLen);
+         j++;
+      } while(j < n2);
+      twidCoefModifier <<= 1u;
+   }
+
+#endif //    #ifndef ARM_MATH_CM0_FAMILY
+
+}
+
+
+void arm_radix2_butterfly_inverse_f32(
+float32_t * pSrc,
+uint32_t fftLen,
+float32_t * pCoef,
+uint16_t twidCoefModifier,
+float32_t onebyfftLen)
+{
+
+   uint32_t i, j, k, l;
+   uint32_t n1, n2, ia;
+   float32_t xt, yt, cosVal, sinVal;
+   float32_t p0, p1, p2, p3;
+   float32_t a0, a1;
+
+#ifndef ARM_MATH_CM0_FAMILY
+
+   n2 = fftLen >> 1;
+   ia = 0;
+
+   // loop for groups 
+   for (i = 0; i < n2; i++)
+   {
+      cosVal = pCoef[ia * 2];
+      sinVal = pCoef[(ia * 2) + 1];
+      ia += twidCoefModifier;
+
+      l = i + n2;
+      a0 = pSrc[2 * i] + pSrc[2 * l];
+      xt = pSrc[2 * i] - pSrc[2 * l];
+
+      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+      a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+      
+      p0 = xt * cosVal;
+      p1 = yt * sinVal;
+      p2 = yt * cosVal;
+      p3 = xt * sinVal;  
+      
+      pSrc[2 * i] = a0;   
+      pSrc[2 * i + 1] = a1;       
+      
+      pSrc[2 * l]     = p0 - p1;
+      pSrc[2 * l + 1] = p2 + p3;  
+   }                             // groups loop end 
+
+   twidCoefModifier <<= 1u;
+
+   // loop for stage 
+   for (k = fftLen / 2; k > 2; k = k >> 1)
+   {
+      n1 = n2;
+      n2 = n2 >> 1;
+      ia = 0;
+
+      // loop for groups 
+      j = 0;
+      do
+      {
+         cosVal = pCoef[ia * 2];
+         sinVal = pCoef[(ia * 2) + 1];
+         ia += twidCoefModifier;
+
+         // loop for butterfly 
+         i = j;
+         do
+         {
+            l = i + n2;
+            a0 = pSrc[2 * i] + pSrc[2 * l];
+            xt = pSrc[2 * i] - pSrc[2 * l];
+
+            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            
+            p0 = xt * cosVal;
+            p1 = yt * sinVal;
+            p2 = yt * cosVal;
+            p3 = xt * sinVal;  
+            
+            pSrc[2 * i] = a0;   
+            pSrc[2 * i + 1] = a1;       
+            
+            pSrc[2 * l]     = p0 - p1;
+            pSrc[2 * l + 1] = p2 + p3; 
+
+            i += n1;
+         } while( i < fftLen );                 // butterfly loop end 
+         j++;
+      } while(j < n2);                      // groups loop end 
+
+      twidCoefModifier <<= 1u;
+   }                             // stages loop end 
+
+   // loop for butterfly 
+   for (i = 0; i < fftLen; i += 2)
+   {   
+      a0 = pSrc[2 * i] + pSrc[2 * i + 2];
+      xt = pSrc[2 * i] - pSrc[2 * i + 2];
+      
+      a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
+      yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
+      
+      p0 = a0 * onebyfftLen;
+      p2 = xt * onebyfftLen;
+      p1 = a1 * onebyfftLen;
+      p3 = yt * onebyfftLen; 
+      
+      pSrc[2 * i] = p0;
+      pSrc[2 * i + 1] = p1;  
+      pSrc[2 * i + 2] = p2;       
+      pSrc[2 * i + 3] = p3;
+   }                             // butterfly loop end 
+
+#else
+
+   n2 = fftLen;
+
+   // loop for stage 
+   for (k = fftLen; k > 2; k = k >> 1)
+   {
+      n1 = n2;
+      n2 = n2 >> 1;
+      ia = 0;
+
+      // loop for groups 
+      j = 0;
+      do
+      {
+         cosVal = pCoef[ia * 2];
+         sinVal = pCoef[(ia * 2) + 1];
+         ia = ia + twidCoefModifier;
+
+         // loop for butterfly 
+         i = j;
+         do
+         {
+            l = i + n2;
+            a0 = pSrc[2 * i] + pSrc[2 * l];
+            xt = pSrc[2 * i] - pSrc[2 * l];
+
+            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+            a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+            
+            p0 = xt * cosVal;
+            p1 = yt * sinVal;
+            p2 = yt * cosVal;
+            p3 = xt * sinVal;  
+            
+            pSrc[2 * i] = a0;   
+            pSrc[2 * i + 1] = a1;       
+            
+            pSrc[2 * l]     = p0 - p1;
+            pSrc[2 * l + 1] = p2 + p3;  
+            
+            i += n1;
+         } while( i < fftLen );                    // butterfly loop end 
+         j++;
+      } while( j < n2 );                      // groups loop end 
+
+      twidCoefModifier = twidCoefModifier << 1u;
+   }                             // stages loop end 
+
+   n1 = n2;
+   n2 = n2 >> 1;
+
+   // loop for butterfly 
+   for (i = 0; i < fftLen; i += n1)
+   {
+      l = i + n2;
+      
+      a0 = pSrc[2 * i] + pSrc[2 * l];
+      xt = pSrc[2 * i] - pSrc[2 * l];
+      
+      a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
+      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+      
+      p0 = a0 * onebyfftLen;
+      p2 = xt * onebyfftLen;
+      p1 = a1 * onebyfftLen;
+      p3 = yt * onebyfftLen; 
+      
+      pSrc[2 * i] = p0;
+      pSrc[2u * l] = p2;
+     
+      pSrc[2 * i + 1] = p1;    
+      pSrc[2u * l + 1u] = p3;
+   }                             // butterfly loop end 
+
+#endif //      #ifndef ARM_MATH_CM0_FAMILY
+
+}
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix2_init_f32.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix2_init_f32.c
@ -0,0 +1,205 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.   
+*   
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:	    arm_cfft_radix4_init_f32.c   
+*   
+* Description:	Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.  
+* -------------------------------------------------------------------- */
+
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**   
+ * @ingroup groupTransforms   
+ */
+
+/**   
+ * @addtogroup ComplexFFT   
+ * @{   
+ */
+
+/**   
+* @brief  Initialization function for the floating-point CFFT/CIFFT.  
+* @deprecated Do not use this function.  It has been superceded by \ref arm_cfft_f32 and will be removed
+* in the future.
+* @param[in,out] *S             points to an instance of the floating-point CFFT/CIFFT structure.  
+* @param[in]     fftLen         length of the FFT.  
+* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.  
+* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.  
+* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.  
+*   
+* \par Description:  
+* \par   
+* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.   
+* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated  
+* \par   
+* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.   
+* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.   
+* \par   
+* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.   
+* \par   
+* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.   
+*/
+arm_status arm_cfft_radix2_init_f32(
+  arm_cfft_radix2_instance_f32 * S,
+  uint16_t fftLen,
+  uint8_t ifftFlag,
+  uint8_t bitReverseFlag)
+{
+  /*  Initialise the default arm status */
+  arm_status status = ARM_MATH_SUCCESS;
+
+  /*  Initialise the FFT length */
+  S->fftLen = fftLen;
+
+  /*  Initialise the Twiddle coefficient pointer */
+  S->pTwiddle = (float32_t *) twiddleCoef;
+
+  /*  Initialise the Flag for selection of CFFT or CIFFT */
+  S->ifftFlag = ifftFlag;
+
+  /*  Initialise the Flag for calculation Bit reversal or not */
+  S->bitReverseFlag = bitReverseFlag;
+
+  /*  Initializations of structure parameters depending on the FFT length */
+  switch (S->fftLen)
+  {
+
+  case 4096u:
+    /*  Initializations of structure parameters for 4096 point FFT */
+
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 1u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 1u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) armBitRevTable;
+    /*  Initialise the 1/fftLen Value */
+    S->onebyfftLen = 0.000244140625;
+    break;
+
+  case 2048u:
+    /*  Initializations of structure parameters for 2048 point FFT */
+
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 2u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 2u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
+    /*  Initialise the 1/fftLen Value */
+    S->onebyfftLen = 0.00048828125;
+    break;
+
+  case 1024u:
+    /*  Initializations of structure parameters for 1024 point FFT */
+
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 4u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 4u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
+    /*  Initialise the 1/fftLen Value */
+    S->onebyfftLen = 0.0009765625f;
+    break;
+
+  case 512u:
+    /*  Initializations of structure parameters for 512 point FFT */
+
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 8u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 8u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
+    /*  Initialise the 1/fftLen Value */
+    S->onebyfftLen = 0.001953125;
+    break;
+
+  case 256u:
+    /*  Initializations of structure parameters for 256 point FFT */
+    S->twidCoefModifier = 16u;
+    S->bitRevFactor = 16u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
+    S->onebyfftLen = 0.00390625f;
+    break;
+
+  case 128u:
+    /*  Initializations of structure parameters for 128 point FFT */
+    S->twidCoefModifier = 32u;
+    S->bitRevFactor = 32u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
+    S->onebyfftLen = 0.0078125;
+    break;
+
+  case 64u:
+    /*  Initializations of structure parameters for 64 point FFT */
+    S->twidCoefModifier = 64u;
+    S->bitRevFactor = 64u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
+    S->onebyfftLen = 0.015625f;
+    break;
+
+  case 32u:
+    /*  Initializations of structure parameters for 64 point FFT */
+    S->twidCoefModifier = 128u;
+    S->bitRevFactor = 128u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
+    S->onebyfftLen = 0.03125;
+    break;
+
+  case 16u:
+    /*  Initializations of structure parameters for 16 point FFT */
+    S->twidCoefModifier = 256u;
+    S->bitRevFactor = 256u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
+    S->onebyfftLen = 0.0625f;
+    break;
+
+
+  default:
+    /*  Reporting argument error if fftSize is not valid value */
+    status = ARM_MATH_ARGUMENT_ERROR;
+    break;
+  }
+
+  return (status);
+}
+
+/**   
+ * @} end of ComplexFFT group   
+ */
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix2_init_q15.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix2_init_q15.c
@ -0,0 +1,188 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013 
+* $Revision: 	V1.4.1   
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_cfft_radix2_init_q15.c    
+*    
+* Description:	Radix-2 Decimation in Frequency Q15 FFT & IFFT initialization function    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.     
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**    
+ * @ingroup groupTransforms    
+ */
+
+
+/**    
+ * @addtogroup ComplexFFT    
+ * @{    
+ */
+
+/**   
+* @brief Initialization function for the Q15 CFFT/CIFFT.   
+* @param[in,out] *S             points to an instance of the Q15 CFFT/CIFFT structure.   
+* @param[in]     fftLen         length of the FFT.   
+* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.  
+* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.  
+* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.  
+*   
+* \par Description:  
+* \par   
+* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.   
+* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated  
+* \par   
+* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.   
+* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.   
+* \par   
+* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.   
+* \par   
+* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.   
+*/
+
+arm_status arm_cfft_radix2_init_q15(
+  arm_cfft_radix2_instance_q15 * S,
+  uint16_t fftLen,
+  uint8_t ifftFlag,
+  uint8_t bitReverseFlag)
+{
+  /*  Initialise the default arm status */
+  arm_status status = ARM_MATH_SUCCESS;
+
+  /*  Initialise the FFT length */
+  S->fftLen = fftLen;
+
+  /*  Initialise the Twiddle coefficient pointer */
+  S->pTwiddle = (q15_t *) twiddleCoefQ15;
+  /*  Initialise the Flag for selection of CFFT or CIFFT */
+  S->ifftFlag = ifftFlag;
+  /*  Initialise the Flag for calculation Bit reversal or not */
+  S->bitReverseFlag = bitReverseFlag;
+
+  /*  Initializations of structure parameters depending on the FFT length */
+  switch (S->fftLen)
+  {
+  case 4096u:
+    /*  Initializations of structure parameters for 4096 point FFT */
+
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 1u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 1u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) armBitRevTable;
+
+    break;
+
+  case 2048u:
+    /*  Initializations of structure parameters for 2048 point FFT */
+
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 2u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 2u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
+
+    break;
+
+  case 1024u:
+    /*  Initializations of structure parameters for 1024 point FFT */
+    S->twidCoefModifier = 4u;
+    S->bitRevFactor = 4u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
+
+    break;
+
+  case 512u:
+    /*  Initializations of structure parameters for 512 point FFT */
+    S->twidCoefModifier = 8u;
+    S->bitRevFactor = 8u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
+
+    break;
+
+  case 256u:
+    /*  Initializations of structure parameters for 256 point FFT */
+    S->twidCoefModifier = 16u;
+    S->bitRevFactor = 16u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
+
+    break;
+
+  case 128u:
+    /*  Initializations of structure parameters for 128 point FFT */
+    S->twidCoefModifier = 32u;
+    S->bitRevFactor = 32u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
+
+    break;
+
+  case 64u:
+    /*  Initializations of structure parameters for 64 point FFT */
+    S->twidCoefModifier = 64u;
+    S->bitRevFactor = 64u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
+
+    break;
+
+  case 32u:
+    /*  Initializations of structure parameters for 32 point FFT */
+    S->twidCoefModifier = 128u;
+    S->bitRevFactor = 128u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
+
+    break;
+
+  case 16u:
+    /*  Initializations of structure parameters for 16 point FFT */
+    S->twidCoefModifier = 256u;
+    S->bitRevFactor = 256u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
+
+    break;
+
+  default:
+    /*  Reporting argument error if fftSize is not valid value */
+    status = ARM_MATH_ARGUMENT_ERROR;
+    break;
+  }
+
+  return (status);
+}
+
+/**    
+ * @} end of ComplexFFT group    
+ */
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix2_init_q31.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix2_init_q31.c
@ -0,0 +1,186 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.   
+*   
+* $Date:        17. January 2013
+* $Revision: 	V1.4.1  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:	    arm_cfft_radix2_init_q31.c   
+*   
+* Description:	Radix-2 Decimation in Frequency Fixed-point CFFT & CIFFT Initialization function   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.  
+* -------------------------------------------------------------------- */
+
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**   
+ * @ingroup groupTransforms   
+ */
+
+/**   
+ * @addtogroup ComplexFFT   
+ * @{   
+ */
+
+
+/**    
+*    
+* @brief  Initialization function for the Q31 CFFT/CIFFT.   
+* @param[in,out] *S             points to an instance of the Q31 CFFT/CIFFT structure.   
+* @param[in]     fftLen         length of the FFT.  
+* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.  
+* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.  
+* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.  
+*   
+* \par Description:  
+* \par   
+* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.   
+* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated  
+* \par   
+* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.   
+* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.   
+* \par   
+* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.   
+* \par   
+* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.   
+*/
+
+arm_status arm_cfft_radix2_init_q31(
+  arm_cfft_radix2_instance_q31 * S,
+  uint16_t fftLen,
+  uint8_t ifftFlag,
+  uint8_t bitReverseFlag)
+{
+  /*  Initialise the default arm status */
+  arm_status status = ARM_MATH_SUCCESS;
+
+  /*  Initialise the FFT length */
+  S->fftLen = fftLen;
+
+  /*  Initialise the Twiddle coefficient pointer */
+  S->pTwiddle = (q31_t *) twiddleCoefQ31;
+  /*  Initialise the Flag for selection of CFFT or CIFFT */
+  S->ifftFlag = ifftFlag;
+  /*  Initialise the Flag for calculation Bit reversal or not */
+  S->bitReverseFlag = bitReverseFlag;
+
+  /*  Initializations of Instance structure depending on the FFT length */
+  switch (S->fftLen)
+  {
+    /*  Initializations of structure parameters for 4096 point FFT */
+  case 4096u:
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 1u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 1u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) armBitRevTable;
+    break;
+
+    /*  Initializations of structure parameters for 2048 point FFT */
+  case 2048u:
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 2u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 2u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
+    break;
+
+    /*  Initializations of structure parameters for 1024 point FFT */
+  case 1024u:
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 4u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 4u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
+    break;
+
+    /*  Initializations of structure parameters for 512 point FFT */
+  case 512u:
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 8u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 8u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
+    break;
+
+  case 256u:
+    /*  Initializations of structure parameters for 256 point FFT */
+    S->twidCoefModifier = 16u;
+    S->bitRevFactor = 16u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
+    break;
+
+  case 128u:
+    /*  Initializations of structure parameters for 128 point FFT */
+    S->twidCoefModifier = 32u;
+    S->bitRevFactor = 32u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
+    break;
+
+  case 64u:
+    /*  Initializations of structure parameters for 64 point FFT */
+    S->twidCoefModifier = 64u;
+    S->bitRevFactor = 64u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
+    break;
+
+  case 32u:
+    /*  Initializations of structure parameters for 32 point FFT */
+    S->twidCoefModifier = 128u;
+    S->bitRevFactor = 128u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
+    break;
+
+  case 16u:
+    /*  Initializations of structure parameters for 16 point FFT */
+    S->twidCoefModifier = 256u;
+    S->bitRevFactor = 256u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
+    break;
+
+
+  default:
+    /*  Reporting argument error if fftSize is not valid value */
+    status = ARM_MATH_ARGUMENT_ERROR;
+    break;
+  }
+
+  return (status);
+}
+
+/**   
+ * @} end of ComplexFFT group   
+ */
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix2_q15.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix2_q15.c
@ -0,0 +1,741 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.   
+*   
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:	    arm_cfft_radix2_q15.c   
+*   
+* Description:	Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function   
+*   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.   
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+void arm_radix2_butterfly_q15(
+  q15_t * pSrc,
+  uint32_t fftLen,
+  q15_t * pCoef,
+  uint16_t twidCoefModifier);
+
+void arm_radix2_butterfly_inverse_q15(
+  q15_t * pSrc,
+  uint32_t fftLen,
+  q15_t * pCoef,
+  uint16_t twidCoefModifier);
+	
+void arm_bitreversal_q15(
+  q15_t * pSrc,
+  uint32_t fftLen,
+  uint16_t bitRevFactor,
+  uint16_t * pBitRevTab);
+
+/**   
+ * @ingroup groupTransforms   
+ */
+
+/**   
+ * @addtogroup ComplexFFT   
+ * @{   
+ */
+
+/**   
+ * @details   
+ * @brief Processing function for the fixed-point CFFT/CIFFT.  
+ * @param[in]      *S    points to an instance of the fixed-point CFFT/CIFFT structure.  
+ * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.  
+ * @return none.  
+ */
+
+void arm_cfft_radix2_q15(
+  const arm_cfft_radix2_instance_q15 * S,
+  q15_t * pSrc)
+{
+
+  if(S->ifftFlag == 1u)
+  {
+    arm_radix2_butterfly_inverse_q15(pSrc, S->fftLen,
+                                     S->pTwiddle, S->twidCoefModifier);
+  }
+  else
+  {
+    arm_radix2_butterfly_q15(pSrc, S->fftLen,
+                             S->pTwiddle, S->twidCoefModifier);
+  }
+
+  arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
+}
+
+/**   
+ * @} end of ComplexFFT group   
+ */
+
+void arm_radix2_butterfly_q15(
+  q15_t * pSrc,
+  uint32_t fftLen,
+  q15_t * pCoef,
+  uint16_t twidCoefModifier)
+{
+#ifndef ARM_MATH_CM0_FAMILY
+
+  unsigned i, j, k, l;
+  unsigned n1, n2, ia;
+  q15_t in;
+  q31_t T, S, R;
+  q31_t coeff, out1, out2;
+
+  //N = fftLen; 
+  n2 = fftLen;
+
+  n1 = n2;
+  n2 = n2 >> 1;
+  ia = 0;
+
+  // loop for groups 
+  for (i = 0; i < n2; i++)
+  {
+    coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
+
+    ia = ia + twidCoefModifier;
+
+    l = i + n2;
+
+    T = _SIMD32_OFFSET(pSrc + (2 * i));
+    in = ((int16_t) (T & 0xFFFF)) >> 2;
+    T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
+
+    S = _SIMD32_OFFSET(pSrc + (2 * l));
+    in = ((int16_t) (S & 0xFFFF)) >> 2;
+    S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
+
+    R = __QSUB16(T, S);
+
+    _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    out1 = __SMUAD(coeff, R) >> 16;
+    out2 = __SMUSDX(coeff, R);
+
+#else
+
+    out1 = __SMUSDX(R, coeff) >> 16u;
+    out2 = __SMUAD(coeff, R);
+
+#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+
+    _SIMD32_OFFSET(pSrc + (2u * l)) =
+      (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
+
+    coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
+
+    ia = ia + twidCoefModifier;
+
+    // loop for butterfly 
+    i++;
+    l++;
+
+    T = _SIMD32_OFFSET(pSrc + (2 * i));
+    in = ((int16_t) (T & 0xFFFF)) >> 2;
+    T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
+
+    S = _SIMD32_OFFSET(pSrc + (2 * l));
+    in = ((int16_t) (S & 0xFFFF)) >> 2;
+    S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
+
+    R = __QSUB16(T, S);
+
+    _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    out1 = __SMUAD(coeff, R) >> 16;
+    out2 = __SMUSDX(coeff, R);
+
+#else
+
+    out1 = __SMUSDX(R, coeff) >> 16u;
+    out2 = __SMUAD(coeff, R);
+
+#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+
+    _SIMD32_OFFSET(pSrc + (2u * l)) =
+      (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
+
+  }                             // groups loop end 
+
+  twidCoefModifier = twidCoefModifier << 1u;
+
+  // loop for stage 
+  for (k = fftLen / 2; k > 2; k = k >> 1)
+  {
+    n1 = n2;
+    n2 = n2 >> 1;
+    ia = 0;
+
+    // loop for groups 
+    for (j = 0; j < n2; j++)
+    {
+      coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
+
+      ia = ia + twidCoefModifier;
+
+      // loop for butterfly 
+      for (i = j; i < fftLen; i += n1)
+      {
+        l = i + n2;
+
+        T = _SIMD32_OFFSET(pSrc + (2 * i));
+
+        S = _SIMD32_OFFSET(pSrc + (2 * l));
+
+        R = __QSUB16(T, S);
+
+        _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+        out1 = __SMUAD(coeff, R) >> 16;
+        out2 = __SMUSDX(coeff, R);
+
+#else
+
+        out1 = __SMUSDX(R, coeff) >> 16u;
+        out2 = __SMUAD(coeff, R);
+
+#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+
+        _SIMD32_OFFSET(pSrc + (2u * l)) =
+          (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
+
+        i += n1;
+
+        l = i + n2;
+
+        T = _SIMD32_OFFSET(pSrc + (2 * i));
+
+        S = _SIMD32_OFFSET(pSrc + (2 * l));
+
+        R = __QSUB16(T, S);
+
+        _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+        out1 = __SMUAD(coeff, R) >> 16;
+        out2 = __SMUSDX(coeff, R);
+
+#else
+
+        out1 = __SMUSDX(R, coeff) >> 16u;
+        out2 = __SMUAD(coeff, R);
+
+#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+
+        _SIMD32_OFFSET(pSrc + (2u * l)) =
+          (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
+
+      }                         // butterfly loop end 
+
+    }                           // groups loop end 
+
+    twidCoefModifier = twidCoefModifier << 1u;
+  }                             // stages loop end 
+
+  n1 = n2;
+  n2 = n2 >> 1;
+  ia = 0;
+
+  coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
+
+  ia = ia + twidCoefModifier;
+
+  // loop for butterfly 
+  for (i = 0; i < fftLen; i += n1)
+  {
+    l = i + n2;
+
+    T = _SIMD32_OFFSET(pSrc + (2 * i));
+
+    S = _SIMD32_OFFSET(pSrc + (2 * l));
+
+    R = __QSUB16(T, S);
+
+    _SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
+
+    _SIMD32_OFFSET(pSrc + (2u * l)) = R;
+
+    i += n1;
+    l = i + n2;
+
+    T = _SIMD32_OFFSET(pSrc + (2 * i));
+
+    S = _SIMD32_OFFSET(pSrc + (2 * l));
+
+    R = __QSUB16(T, S);
+
+    _SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
+
+    _SIMD32_OFFSET(pSrc + (2u * l)) = R;
+
+  }                             // groups loop end 
+
+
+#else
+
+  unsigned i, j, k, l;
+  unsigned n1, n2, ia;
+  q15_t xt, yt, cosVal, sinVal;
+
+
+  //N = fftLen; 
+  n2 = fftLen;
+
+  n1 = n2;
+  n2 = n2 >> 1;
+  ia = 0;
+
+  // loop for groups 
+  for (j = 0; j < n2; j++)
+  {
+    cosVal = pCoef[ia * 2];
+    sinVal = pCoef[(ia * 2) + 1];
+    ia = ia + twidCoefModifier;
+
+    // loop for butterfly 
+    for (i = j; i < fftLen; i += n1)
+    {
+      l = i + n2;
+      xt = (pSrc[2 * i] >> 2u) - (pSrc[2 * l] >> 2u);
+      pSrc[2 * i] = ((pSrc[2 * i] >> 2u) + (pSrc[2 * l] >> 2u)) >> 1u;
+
+      yt = (pSrc[2 * i + 1] >> 2u) - (pSrc[2 * l + 1] >> 2u);
+      pSrc[2 * i + 1] =
+        ((pSrc[2 * l + 1] >> 2u) + (pSrc[2 * i + 1] >> 2u)) >> 1u;
+
+      pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
+                      ((int16_t) (((q31_t) yt * sinVal) >> 16)));
+
+      pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
+                           ((int16_t) (((q31_t) xt * sinVal) >> 16)));
+
+    }                           // butterfly loop end 
+
+  }                             // groups loop end 
+
+  twidCoefModifier = twidCoefModifier << 1u;
+
+  // loop for stage 
+  for (k = fftLen / 2; k > 2; k = k >> 1)
+  {
+    n1 = n2;
+    n2 = n2 >> 1;
+    ia = 0;
+
+    // loop for groups 
+    for (j = 0; j < n2; j++)
+    {
+      cosVal = pCoef[ia * 2];
+      sinVal = pCoef[(ia * 2) + 1];
+      ia = ia + twidCoefModifier;
+
+      // loop for butterfly 
+      for (i = j; i < fftLen; i += n1)
+      {
+        l = i + n2;
+        xt = pSrc[2 * i] - pSrc[2 * l];
+        pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1u;
+
+        yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+        pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1u;
+
+        pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
+                        ((int16_t) (((q31_t) yt * sinVal) >> 16)));
+
+        pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
+                             ((int16_t) (((q31_t) xt * sinVal) >> 16)));
+
+      }                         // butterfly loop end 
+
+    }                           // groups loop end 
+
+    twidCoefModifier = twidCoefModifier << 1u;
+  }                             // stages loop end 
+
+  n1 = n2;
+  n2 = n2 >> 1;
+  ia = 0;
+
+  // loop for groups 
+  for (j = 0; j < n2; j++)
+  {
+    cosVal = pCoef[ia * 2];
+    sinVal = pCoef[(ia * 2) + 1];
+
+    ia = ia + twidCoefModifier;
+
+    // loop for butterfly 
+    for (i = j; i < fftLen; i += n1)
+    {
+      l = i + n2;
+      xt = pSrc[2 * i] - pSrc[2 * l];
+      pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
+
+      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+      pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
+
+      pSrc[2u * l] = xt;
+
+      pSrc[2u * l + 1u] = yt;
+
+    }                           // butterfly loop end 
+
+  }                             // groups loop end 
+
+  twidCoefModifier = twidCoefModifier << 1u;
+
+#endif //             #ifndef ARM_MATH_CM0_FAMILY
+
+}
+
+
+void arm_radix2_butterfly_inverse_q15(
+  q15_t * pSrc,
+  uint32_t fftLen,
+  q15_t * pCoef,
+  uint16_t twidCoefModifier)
+{
+#ifndef ARM_MATH_CM0_FAMILY
+
+  unsigned i, j, k, l;
+  unsigned n1, n2, ia;
+  q15_t in;
+  q31_t T, S, R;
+  q31_t coeff, out1, out2;
+
+  //N = fftLen; 
+  n2 = fftLen;
+
+  n1 = n2;
+  n2 = n2 >> 1;
+  ia = 0;
+
+  // loop for groups 
+  for (i = 0; i < n2; i++)
+  {
+    coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
+
+    ia = ia + twidCoefModifier;
+
+    l = i + n2;
+
+    T = _SIMD32_OFFSET(pSrc + (2 * i));
+    in = ((int16_t) (T & 0xFFFF)) >> 2;
+    T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
+
+    S = _SIMD32_OFFSET(pSrc + (2 * l));
+    in = ((int16_t) (S & 0xFFFF)) >> 2;
+    S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
+
+    R = __QSUB16(T, S);
+
+    _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    out1 = __SMUSD(coeff, R) >> 16;
+    out2 = __SMUADX(coeff, R);
+#else
+
+    out1 = __SMUADX(R, coeff) >> 16u;
+    out2 = __SMUSD(__QSUB(0, coeff), R);
+
+#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+
+    _SIMD32_OFFSET(pSrc + (2u * l)) =
+      (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
+
+    coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
+
+    ia = ia + twidCoefModifier;
+
+    // loop for butterfly 
+    i++;
+    l++;
+
+    T = _SIMD32_OFFSET(pSrc + (2 * i));
+    in = ((int16_t) (T & 0xFFFF)) >> 2;
+    T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
+
+    S = _SIMD32_OFFSET(pSrc + (2 * l));
+    in = ((int16_t) (S & 0xFFFF)) >> 2;
+    S = ((S >> 2) & 0xFFFF0000) | (in & 0xFFFF);
+
+    R = __QSUB16(T, S);
+
+    _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    out1 = __SMUSD(coeff, R) >> 16;
+    out2 = __SMUADX(coeff, R);
+#else
+
+    out1 = __SMUADX(R, coeff) >> 16u;
+    out2 = __SMUSD(__QSUB(0, coeff), R);
+
+#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+
+    _SIMD32_OFFSET(pSrc + (2u * l)) =
+      (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
+
+  }                             // groups loop end 
+
+  twidCoefModifier = twidCoefModifier << 1u;
+
+  // loop for stage 
+  for (k = fftLen / 2; k > 2; k = k >> 1)
+  {
+    n1 = n2;
+    n2 = n2 >> 1;
+    ia = 0;
+
+    // loop for groups 
+    for (j = 0; j < n2; j++)
+    {
+      coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
+
+      ia = ia + twidCoefModifier;
+
+      // loop for butterfly 
+      for (i = j; i < fftLen; i += n1)
+      {
+        l = i + n2;
+
+        T = _SIMD32_OFFSET(pSrc + (2 * i));
+
+        S = _SIMD32_OFFSET(pSrc + (2 * l));
+
+        R = __QSUB16(T, S);
+
+        _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+        out1 = __SMUSD(coeff, R) >> 16;
+        out2 = __SMUADX(coeff, R);
+
+#else
+
+        out1 = __SMUADX(R, coeff) >> 16u;
+        out2 = __SMUSD(__QSUB(0, coeff), R);
+
+#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+
+        _SIMD32_OFFSET(pSrc + (2u * l)) =
+          (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
+
+        i += n1;
+
+        l = i + n2;
+
+        T = _SIMD32_OFFSET(pSrc + (2 * i));
+
+        S = _SIMD32_OFFSET(pSrc + (2 * l));
+
+        R = __QSUB16(T, S);
+
+        _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+        out1 = __SMUSD(coeff, R) >> 16;
+        out2 = __SMUADX(coeff, R);
+#else
+
+        out1 = __SMUADX(R, coeff) >> 16u;
+        out2 = __SMUSD(__QSUB(0, coeff), R);
+
+#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+
+        _SIMD32_OFFSET(pSrc + (2u * l)) =
+          (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
+
+      }                         // butterfly loop end 
+
+    }                           // groups loop end 
+
+    twidCoefModifier = twidCoefModifier << 1u;
+  }                             // stages loop end 
+
+  n1 = n2;
+  n2 = n2 >> 1;
+  ia = 0;
+
+  // loop for groups 
+  for (j = 0; j < n2; j++)
+  {
+    coeff = _SIMD32_OFFSET(pCoef + (ia * 2u));
+
+    ia = ia + twidCoefModifier;
+
+    // loop for butterfly 
+    for (i = j; i < fftLen; i += n1)
+    {
+      l = i + n2;
+
+      T = _SIMD32_OFFSET(pSrc + (2 * i));
+
+      S = _SIMD32_OFFSET(pSrc + (2 * l));
+
+      R = __QSUB16(T, S);
+
+      _SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
+
+      _SIMD32_OFFSET(pSrc + (2u * l)) = R;
+
+    }                           // butterfly loop end 
+
+  }                             // groups loop end 
+
+  twidCoefModifier = twidCoefModifier << 1u;
+
+#else
+
+
+  unsigned i, j, k, l;
+  unsigned n1, n2, ia;
+  q15_t xt, yt, cosVal, sinVal;
+
+  //N = fftLen; 
+  n2 = fftLen;
+
+  n1 = n2;
+  n2 = n2 >> 1;
+  ia = 0;
+
+  // loop for groups 
+  for (j = 0; j < n2; j++)
+  {
+    cosVal = pCoef[ia * 2];
+    sinVal = pCoef[(ia * 2) + 1];
+    ia = ia + twidCoefModifier;
+
+    // loop for butterfly 
+    for (i = j; i < fftLen; i += n1)
+    {
+      l = i + n2;
+      xt = (pSrc[2 * i] >> 2u) - (pSrc[2 * l] >> 2u);
+      pSrc[2 * i] = ((pSrc[2 * i] >> 2u) + (pSrc[2 * l] >> 2u)) >> 1u;
+
+      yt = (pSrc[2 * i + 1] >> 2u) - (pSrc[2 * l + 1] >> 2u);
+      pSrc[2 * i + 1] =
+        ((pSrc[2 * l + 1] >> 2u) + (pSrc[2 * i + 1] >> 2u)) >> 1u;
+
+      pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
+                      ((int16_t) (((q31_t) yt * sinVal) >> 16)));
+
+      pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
+                           ((int16_t) (((q31_t) xt * sinVal) >> 16)));
+
+    }                           // butterfly loop end 
+
+  }                             // groups loop end 
+
+  twidCoefModifier = twidCoefModifier << 1u;
+
+  // loop for stage 
+  for (k = fftLen / 2; k > 2; k = k >> 1)
+  {
+    n1 = n2;
+    n2 = n2 >> 1;
+    ia = 0;
+
+    // loop for groups 
+    for (j = 0; j < n2; j++)
+    {
+      cosVal = pCoef[ia * 2];
+      sinVal = pCoef[(ia * 2) + 1];
+      ia = ia + twidCoefModifier;
+
+      // loop for butterfly 
+      for (i = j; i < fftLen; i += n1)
+      {
+        l = i + n2;
+        xt = pSrc[2 * i] - pSrc[2 * l];
+        pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1u;
+
+        yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+        pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1u;
+
+        pSrc[2u * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
+                        ((int16_t) (((q31_t) yt * sinVal) >> 16)));
+
+        pSrc[2u * l + 1u] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
+                             ((int16_t) (((q31_t) xt * sinVal) >> 16)));
+
+      }                         // butterfly loop end 
+
+    }                           // groups loop end 
+
+    twidCoefModifier = twidCoefModifier << 1u;
+  }                             // stages loop end 
+
+  n1 = n2;
+  n2 = n2 >> 1;
+  ia = 0;
+
+  cosVal = pCoef[ia * 2];
+  sinVal = pCoef[(ia * 2) + 1];
+
+  ia = ia + twidCoefModifier;
+
+  // loop for butterfly 
+  for (i = 0; i < fftLen; i += n1)
+  {
+    l = i + n2;
+    xt = pSrc[2 * i] - pSrc[2 * l];
+    pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
+
+    yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+    pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
+
+    pSrc[2u * l] = xt;
+
+    pSrc[2u * l + 1u] = yt;
+
+  }                             // groups loop end 
+
+
+#endif //             #ifndef ARM_MATH_CM0_FAMILY
+
+}
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix2_q31.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix2_q31.c
@ -0,0 +1,350 @@
+/* ----------------------------------------------------------------------   
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.   
+*   
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*   
+* Project: 	    CMSIS DSP Library   
+* Title:	    arm_cfft_radix2_q31.c   
+*   
+* Description:	Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function   
+*   
+*   
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+void arm_radix2_butterfly_q31(
+  q31_t * pSrc,
+  uint32_t fftLen,
+  q31_t * pCoef,
+  uint16_t twidCoefModifier);
+
+void arm_radix2_butterfly_inverse_q31(
+  q31_t * pSrc,
+  uint32_t fftLen,
+  q31_t * pCoef,
+  uint16_t twidCoefModifier);
+
+void arm_bitreversal_q31(
+  q31_t * pSrc,
+  uint32_t fftLen,
+  uint16_t bitRevFactor,
+  uint16_t * pBitRevTab);
+
+/**   
+* @ingroup groupTransforms   
+*/
+
+/**   
+* @addtogroup ComplexFFT   
+* @{   
+*/
+
+/**   
+* @details   
+* @brief Processing function for the fixed-point CFFT/CIFFT.  
+* @param[in]      *S    points to an instance of the fixed-point CFFT/CIFFT structure.  
+* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.  
+* @return none.  
+*/
+
+void arm_cfft_radix2_q31(
+const arm_cfft_radix2_instance_q31 * S,
+q31_t * pSrc)
+{
+
+   if(S->ifftFlag == 1u)
+   {
+      arm_radix2_butterfly_inverse_q31(pSrc, S->fftLen,
+      S->pTwiddle, S->twidCoefModifier);
+   }
+   else
+   {
+      arm_radix2_butterfly_q31(pSrc, S->fftLen,
+      S->pTwiddle, S->twidCoefModifier);
+   }
+
+   arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
+}
+
+/**   
+* @} end of ComplexFFT group   
+*/
+
+void arm_radix2_butterfly_q31(
+q31_t * pSrc,
+uint32_t fftLen,
+q31_t * pCoef,
+uint16_t twidCoefModifier)
+{
+
+   unsigned i, j, k, l, m;
+   unsigned n1, n2, ia;
+   q31_t xt, yt, cosVal, sinVal;
+   q31_t p0, p1;
+
+   //N = fftLen; 
+   n2 = fftLen;
+
+   n1 = n2;
+   n2 = n2 >> 1;
+   ia = 0;
+
+   // loop for groups 
+   for (i = 0; i < n2; i++)
+   {
+      cosVal = pCoef[ia * 2];
+      sinVal = pCoef[(ia * 2) + 1];
+      ia = ia + twidCoefModifier;
+
+      l = i + n2;
+      xt = (pSrc[2 * i] >> 2u) - (pSrc[2 * l] >> 2u);
+      pSrc[2 * i] = ((pSrc[2 * i] >> 2u) + (pSrc[2 * l] >> 2u)) >> 1u;
+      
+      yt = (pSrc[2 * i + 1] >> 2u) - (pSrc[2 * l + 1] >> 2u);
+      pSrc[2 * i + 1] =
+        ((pSrc[2 * l + 1] >> 2u) + (pSrc[2 * i + 1] >> 2u)) >> 1u;
+
+      mult_32x32_keep32_R(p0, xt, cosVal);
+      mult_32x32_keep32_R(p1, yt, cosVal);
+      multAcc_32x32_keep32_R(p0, yt, sinVal); 
+      multSub_32x32_keep32_R(p1, xt, sinVal);
+      
+      pSrc[2u * l] = p0;
+      pSrc[2u * l + 1u] = p1;
+
+   }                             // groups loop end 
+
+   twidCoefModifier <<= 1u;
+
+   // loop for stage 
+   for (k = fftLen / 2; k > 2; k = k >> 1)
+   {
+      n1 = n2;
+      n2 = n2 >> 1;
+      ia = 0;
+
+      // loop for groups 
+      for (j = 0; j < n2; j++)
+      {
+         cosVal = pCoef[ia * 2];
+         sinVal = pCoef[(ia * 2) + 1];
+         ia = ia + twidCoefModifier;
+
+         // loop for butterfly 
+         i = j;
+         m = fftLen / n1;
+         do
+         {
+            l = i + n2;
+            xt = pSrc[2 * i] - pSrc[2 * l];
+            pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1u;
+            
+            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+            pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1u;
+
+            mult_32x32_keep32_R(p0, xt, cosVal);
+            mult_32x32_keep32_R(p1, yt, cosVal);
+            multAcc_32x32_keep32_R(p0, yt, sinVal);
+            multSub_32x32_keep32_R(p1, xt, sinVal);
+            
+            pSrc[2u * l] = p0;
+            pSrc[2u * l + 1u] = p1;
+            i += n1;
+            m--;
+         } while( m > 0);                   // butterfly loop end 
+
+      }                           // groups loop end 
+
+      twidCoefModifier <<= 1u;
+   }                             // stages loop end 
+
+   n1 = n2;
+   n2 = n2 >> 1;
+   ia = 0;
+
+   cosVal = pCoef[ia * 2];
+   sinVal = pCoef[(ia * 2) + 1];
+   ia = ia + twidCoefModifier;
+
+   // loop for butterfly 
+   for (i = 0; i < fftLen; i += n1)
+   {
+      l = i + n2;
+      xt = pSrc[2 * i] - pSrc[2 * l];
+      pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
+
+      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+      pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
+
+      pSrc[2u * l] = xt;
+
+      pSrc[2u * l + 1u] = yt;
+
+      i += n1;
+      l = i + n2;
+
+      xt = pSrc[2 * i] - pSrc[2 * l];
+      pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
+
+      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+      pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
+
+      pSrc[2u * l] = xt;
+
+      pSrc[2u * l + 1u] = yt;
+
+   }                             // butterfly loop end 
+
+}
+
+
+void arm_radix2_butterfly_inverse_q31(
+q31_t * pSrc,
+uint32_t fftLen,
+q31_t * pCoef,
+uint16_t twidCoefModifier)
+{
+
+   unsigned i, j, k, l;
+   unsigned n1, n2, ia;
+   q31_t xt, yt, cosVal, sinVal;
+   q31_t p0, p1;
+
+   //N = fftLen; 
+   n2 = fftLen;
+
+   n1 = n2;
+   n2 = n2 >> 1;
+   ia = 0;
+
+   // loop for groups 
+   for (i = 0; i < n2; i++)
+   {
+      cosVal = pCoef[ia * 2];
+      sinVal = pCoef[(ia * 2) + 1];
+      ia = ia + twidCoefModifier;
+
+      l = i + n2;
+      xt = (pSrc[2 * i] >> 2u) - (pSrc[2 * l] >> 2u);
+      pSrc[2 * i] = ((pSrc[2 * i] >> 2u) + (pSrc[2 * l] >> 2u)) >> 1u;
+      
+      yt = (pSrc[2 * i + 1] >> 2u) - (pSrc[2 * l + 1] >> 2u);
+      pSrc[2 * i + 1] =
+        ((pSrc[2 * l + 1] >> 2u) + (pSrc[2 * i + 1] >> 2u)) >> 1u;
+
+      mult_32x32_keep32_R(p0, xt, cosVal);
+      mult_32x32_keep32_R(p1, yt, cosVal);
+      multSub_32x32_keep32_R(p0, yt, sinVal);
+      multAcc_32x32_keep32_R(p1, xt, sinVal);
+      
+      pSrc[2u * l] = p0;
+      pSrc[2u * l + 1u] = p1;
+   }                             // groups loop end 
+
+   twidCoefModifier = twidCoefModifier << 1u;
+
+   // loop for stage 
+   for (k = fftLen / 2; k > 2; k = k >> 1)
+   {
+      n1 = n2;
+      n2 = n2 >> 1;
+      ia = 0;
+
+      // loop for groups 
+      for (j = 0; j < n2; j++)
+      {
+         cosVal = pCoef[ia * 2];
+         sinVal = pCoef[(ia * 2) + 1];
+         ia = ia + twidCoefModifier;
+
+         // loop for butterfly 
+         for (i = j; i < fftLen; i += n1)
+         {
+            l = i + n2;
+            xt = pSrc[2 * i] - pSrc[2 * l];
+            pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1u;
+      
+            yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+            pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1u;
+
+            mult_32x32_keep32_R(p0, xt, cosVal);
+            mult_32x32_keep32_R(p1, yt, cosVal);
+            multSub_32x32_keep32_R(p0, yt, sinVal);
+            multAcc_32x32_keep32_R(p1, xt, sinVal);
+            
+            pSrc[2u * l] = p0;
+            pSrc[2u * l + 1u] = p1;
+         }                         // butterfly loop end 
+
+      }                           // groups loop end 
+
+      twidCoefModifier = twidCoefModifier << 1u;
+   }                             // stages loop end 
+
+   n1 = n2;
+   n2 = n2 >> 1;
+   ia = 0;
+
+   cosVal = pCoef[ia * 2];
+   sinVal = pCoef[(ia * 2) + 1];
+   ia = ia + twidCoefModifier;
+
+   // loop for butterfly 
+   for (i = 0; i < fftLen; i += n1)
+   {
+      l = i + n2;
+      xt = pSrc[2 * i] - pSrc[2 * l];
+      pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
+
+      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+      pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
+
+      pSrc[2u * l] = xt;
+
+      pSrc[2u * l + 1u] = yt;
+
+      i += n1;
+      l = i + n2;
+
+      xt = pSrc[2 * i] - pSrc[2 * l];
+      pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
+
+      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
+      pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
+
+      pSrc[2u * l] = xt;
+
+      pSrc[2u * l + 1u] = yt;
+
+   }                             // butterfly loop end 
+
+}
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix4_f32.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix4_f32.c
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix4_init_f32.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix4_init_f32.c
@ -0,0 +1,165 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_cfft_radix4_init_f32.c    
+*    
+* Description:	Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.    
+* -------------------------------------------------------------------- */
+
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**    
+ * @ingroup groupTransforms    
+ */
+
+/**    
+ * @addtogroup ComplexFFT    
+ * @{    
+ */
+
+/**    
+* @brief  Initialization function for the floating-point CFFT/CIFFT.   
+* @deprecated Do not use this function.  It has been superceded by \ref arm_cfft_f32 and will be removed
+* in the future.
+* @param[in,out] *S             points to an instance of the floating-point CFFT/CIFFT structure.   
+* @param[in]     fftLen         length of the FFT.   
+* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.   
+* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.   
+* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.   
+*    
+* \par Description:   
+* \par    
+* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.    
+* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated   
+* \par    
+* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.    
+* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.    
+* \par    
+* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.    
+* \par    
+* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.    
+*/
+
+arm_status arm_cfft_radix4_init_f32(
+  arm_cfft_radix4_instance_f32 * S,
+  uint16_t fftLen,
+  uint8_t ifftFlag,
+  uint8_t bitReverseFlag)
+{
+  /*  Initialise the default arm status */
+  arm_status status = ARM_MATH_SUCCESS;
+
+  /*  Initialise the FFT length */
+  S->fftLen = fftLen;
+
+  /*  Initialise the Twiddle coefficient pointer */
+  S->pTwiddle = (float32_t *) twiddleCoef;
+
+  /*  Initialise the Flag for selection of CFFT or CIFFT */
+  S->ifftFlag = ifftFlag;
+
+  /*  Initialise the Flag for calculation Bit reversal or not */
+  S->bitReverseFlag = bitReverseFlag;
+
+  /*  Initializations of structure parameters depending on the FFT length */
+  switch (S->fftLen)
+  {
+
+  case 4096u:
+    /*  Initializations of structure parameters for 4096 point FFT */
+
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 1u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 1u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) armBitRevTable;
+    /*  Initialise the 1/fftLen Value */
+    S->onebyfftLen = 0.000244140625;
+    break;
+
+  case 1024u:
+    /*  Initializations of structure parameters for 1024 point FFT */
+
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 4u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 4u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
+    /*  Initialise the 1/fftLen Value */
+    S->onebyfftLen = 0.0009765625f;
+    break;
+
+
+  case 256u:
+    /*  Initializations of structure parameters for 256 point FFT */
+    S->twidCoefModifier = 16u;
+    S->bitRevFactor = 16u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
+    S->onebyfftLen = 0.00390625f;
+    break;
+
+  case 64u:
+    /*  Initializations of structure parameters for 64 point FFT */
+    S->twidCoefModifier = 64u;
+    S->bitRevFactor = 64u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
+    S->onebyfftLen = 0.015625f;
+    break;
+
+  case 16u:
+    /*  Initializations of structure parameters for 16 point FFT */
+    S->twidCoefModifier = 256u;
+    S->bitRevFactor = 256u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
+    S->onebyfftLen = 0.0625f;
+    break;
+
+
+  default:
+    /*  Reporting argument error if fftSize is not valid value */
+    status = ARM_MATH_ARGUMENT_ERROR;
+    break;
+  }
+
+  return (status);
+}
+
+/**    
+ * @} end of ComplexFFT group    
+ */
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix4_init_q15.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix4_init_q15.c
@ -0,0 +1,151 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_cfft_radix4_init_q15.c    
+*    
+* Description:	Radix-4 Decimation in Frequency Q15 FFT & IFFT initialization function    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**    
+ * @ingroup groupTransforms    
+ */
+
+
+/**    
+ * @addtogroup ComplexFFT    
+ * @{    
+ */
+
+
+/**    
+* @brief Initialization function for the Q15 CFFT/CIFFT.   
+* @param[in,out] *S             points to an instance of the Q15 CFFT/CIFFT structure.   
+* @param[in]     fftLen         length of the FFT.   
+* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.   
+* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.   
+* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.   
+*    
+* \par Description:   
+* \par    
+* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.    
+* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated   
+* \par    
+* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.    
+* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.    
+* \par    
+* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.    
+* \par    
+* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.    
+*/
+
+arm_status arm_cfft_radix4_init_q15(
+  arm_cfft_radix4_instance_q15 * S,
+  uint16_t fftLen,
+  uint8_t ifftFlag,
+  uint8_t bitReverseFlag)
+{
+  /*  Initialise the default arm status */
+  arm_status status = ARM_MATH_SUCCESS;
+  /*  Initialise the FFT length */
+  S->fftLen = fftLen;
+  /*  Initialise the Twiddle coefficient pointer */
+  S->pTwiddle = (q15_t *) twiddleCoefQ15;
+  /*  Initialise the Flag for selection of CFFT or CIFFT */
+  S->ifftFlag = ifftFlag;
+  /*  Initialise the Flag for calculation Bit reversal or not */
+  S->bitReverseFlag = bitReverseFlag;
+
+  /*  Initializations of structure parameters depending on the FFT length */
+  switch (S->fftLen)
+  {
+  case 4096u:
+    /*  Initializations of structure parameters for 4096 point FFT */
+
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 1u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 1u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) armBitRevTable;
+
+    break;
+
+  case 1024u:
+    /*  Initializations of structure parameters for 1024 point FFT */
+    S->twidCoefModifier = 4u;
+    S->bitRevFactor = 4u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
+
+    break;
+
+  case 256u:
+    /*  Initializations of structure parameters for 256 point FFT */
+    S->twidCoefModifier = 16u;
+    S->bitRevFactor = 16u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
+
+    break;
+
+  case 64u:
+    /*  Initializations of structure parameters for 64 point FFT */
+    S->twidCoefModifier = 64u;
+    S->bitRevFactor = 64u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
+
+    break;
+
+  case 16u:
+    /*  Initializations of structure parameters for 16 point FFT */
+    S->twidCoefModifier = 256u;
+    S->bitRevFactor = 256u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
+
+    break;
+
+  default:
+    /*  Reporting argument error if fftSize is not valid value */
+    status = ARM_MATH_ARGUMENT_ERROR;
+    break;
+  }
+
+  return (status);
+}
+
+/**    
+ * @} end of ComplexFFT group    
+ */
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix4_init_q31.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix4_init_q31.c
@ -0,0 +1,147 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013 
+* $Revision: 	V1.4.1   
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_cfft_radix4_init_q31.c    
+*    
+* Description:	Radix-4 Decimation in Frequency Q31 FFT & IFFT initialization function    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.     
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**    
+ * @ingroup groupTransforms    
+ */
+
+/**    
+ * @addtogroup ComplexFFT    
+ * @{    
+ */
+
+/**    
+*    
+* @brief  Initialization function for the Q31 CFFT/CIFFT.   
+* @param[in,out] *S             points to an instance of the Q31 CFFT/CIFFT structure.   
+* @param[in]     fftLen         length of the FFT.   
+* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.   
+* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.   
+* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.   
+*    
+* \par Description:   
+* \par    
+* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.    
+* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated   
+* \par    
+* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.    
+* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.    
+* \par    
+* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.    
+* \par    
+* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.    
+*/
+
+arm_status arm_cfft_radix4_init_q31(
+  arm_cfft_radix4_instance_q31 * S,
+  uint16_t fftLen,
+  uint8_t ifftFlag,
+  uint8_t bitReverseFlag)
+{
+  /*  Initialise the default arm status */
+  arm_status status = ARM_MATH_SUCCESS;
+  /*  Initialise the FFT length */
+  S->fftLen = fftLen;
+  /*  Initialise the Twiddle coefficient pointer */
+  S->pTwiddle = (q31_t *) twiddleCoefQ31;
+  /*  Initialise the Flag for selection of CFFT or CIFFT */
+  S->ifftFlag = ifftFlag;
+  /*  Initialise the Flag for calculation Bit reversal or not */
+  S->bitReverseFlag = bitReverseFlag;
+
+  /*  Initializations of Instance structure depending on the FFT length */
+  switch (S->fftLen)
+  {
+    /*  Initializations of structure parameters for 4096 point FFT */
+  case 4096u:
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 1u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 1u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) armBitRevTable;
+    break;
+
+    /*  Initializations of structure parameters for 1024 point FFT */
+  case 1024u:
+    /*  Initialise the twiddle coef modifier value */
+    S->twidCoefModifier = 4u;
+    /*  Initialise the bit reversal table modifier */
+    S->bitRevFactor = 4u;
+    /*  Initialise the bit reversal table pointer */
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
+    break;
+
+  case 256u:
+    /*  Initializations of structure parameters for 256 point FFT */
+    S->twidCoefModifier = 16u;
+    S->bitRevFactor = 16u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
+    break;
+
+  case 64u:
+    /*  Initializations of structure parameters for 64 point FFT */
+    S->twidCoefModifier = 64u;
+    S->bitRevFactor = 64u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
+    break;
+
+  case 16u:
+    /*  Initializations of structure parameters for 16 point FFT */
+    S->twidCoefModifier = 256u;
+    S->bitRevFactor = 256u;
+    S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
+    break;
+
+  default:
+    /*  Reporting argument error if fftSize is not valid value */
+    status = ARM_MATH_ARGUMENT_ERROR;
+    break;
+  }
+
+  return (status);
+}
+
+/**    
+ * @} end of ComplexFFT group    
+ */
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix4_q15.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix4_q15.c
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix4_q31.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix4_q31.c
@ -0,0 +1,911 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_cfft_radix4_q31.c    
+*    
+* Description:	This file has function definition of Radix-4 FFT & IFFT function and    
+*				In-place bit reversal using bit reversal table    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.     
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+void arm_radix4_butterfly_inverse_q31(
+q31_t * pSrc,
+uint32_t fftLen,
+q31_t * pCoef,
+uint32_t twidCoefModifier);
+
+void arm_radix4_butterfly_q31(
+q31_t * pSrc,
+uint32_t fftLen,
+q31_t * pCoef,
+uint32_t twidCoefModifier);
+
+void arm_bitreversal_q31(
+q31_t * pSrc,
+uint32_t fftLen,
+uint16_t bitRevFactor,
+uint16_t * pBitRevTab);
+
+/**    
+ * @ingroup groupTransforms    
+ */
+
+/**    
+ * @addtogroup ComplexFFT    
+ * @{    
+ */
+
+/**    
+ * @details    
+ * @brief Processing function for the Q31 CFFT/CIFFT.    
+ * @param[in]      *S    points to an instance of the Q31 CFFT/CIFFT structure.   
+ * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.   
+ * @return none.    
+ *     
+ * \par Input and output formats:    
+ * \par    
+ * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.   
+ * Hence the output format is different for different FFT sizes.    
+ * The input and output formats for different FFT sizes and number of bits to upscale are mentioned in the tables below for CFFT and CIFFT:   
+ * \par   
+ * \image html CFFTQ31.gif "Input and Output Formats for Q31 CFFT"    
+ * \image html CIFFTQ31.gif "Input and Output Formats for Q31 CIFFT"    
+ *    
+ */
+
+void arm_cfft_radix4_q31(
+  const arm_cfft_radix4_instance_q31 * S,
+  q31_t * pSrc)
+{
+  if(S->ifftFlag == 1u)
+  {
+    /* Complex IFFT radix-4 */
+    arm_radix4_butterfly_inverse_q31(pSrc, S->fftLen, S->pTwiddle,
+                                     S->twidCoefModifier);
+  }
+  else
+  {
+    /* Complex FFT radix-4 */
+    arm_radix4_butterfly_q31(pSrc, S->fftLen, S->pTwiddle,
+                             S->twidCoefModifier);
+  }
+
+
+  if(S->bitReverseFlag == 1u)
+  {
+    /*  Bit Reversal */
+    arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
+  }
+
+}
+
+/**    
+ * @} end of ComplexFFT group    
+ */
+
+/*    
+* Radix-4 FFT algorithm used is :    
+*    
+* Input real and imaginary data:    
+* x(n) = xa + j * ya    
+* x(n+N/4 ) = xb + j * yb    
+* x(n+N/2 ) = xc + j * yc    
+* x(n+3N 4) = xd + j * yd    
+*    
+*    
+* Output real and imaginary data:    
+* x(4r) = xa'+ j * ya'    
+* x(4r+1) = xb'+ j * yb'    
+* x(4r+2) = xc'+ j * yc'    
+* x(4r+3) = xd'+ j * yd'    
+*    
+*    
+* Twiddle factors for radix-4 FFT:    
+* Wn = co1 + j * (- si1)    
+* W2n = co2 + j * (- si2)    
+* W3n = co3 + j * (- si3)    
+*    
+*  Butterfly implementation:    
+* xa' = xa + xb + xc + xd    
+* ya' = ya + yb + yc + yd    
+* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)    
+* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)    
+* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)    
+* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)    
+* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)    
+* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)    
+*    
+*/
+
+/**    
+ * @brief  Core function for the Q31 CFFT butterfly process.   
+ * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.   
+ * @param[in]      fftLen           length of the FFT.   
+ * @param[in]      *pCoef           points to twiddle coefficient buffer.   
+ * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
+ * @return none.   
+ */
+
+void arm_radix4_butterfly_q31(
+  q31_t * pSrc,
+  uint32_t fftLen,
+  q31_t * pCoef,
+  uint32_t twidCoefModifier)
+{
+  uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
+  q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
+
+  q31_t xa, xb, xc, xd;
+  q31_t ya, yb, yc, yd;
+  q31_t xa_out, xb_out, xc_out, xd_out;
+  q31_t ya_out, yb_out, yc_out, yd_out;
+
+  q31_t *ptr1;
+  q63_t xaya, xbyb, xcyc, xdyd;
+  /* Total process is divided into three stages */
+
+  /* process first stage, middle stages, & last stage */
+
+
+  /* start of first stage process */
+
+  /*  Initializations for the first stage */
+  n2 = fftLen;
+  n1 = n2;
+  /* n2 = fftLen/4 */
+  n2 >>= 2u;
+  i0 = 0u;
+  ia1 = 0u;
+
+  j = n2;
+
+  /*  Calculation of first stage */
+  do
+  {
+    /*  index calculation for the input as, */
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
+    i1 = i0 + n2;
+    i2 = i1 + n2;
+    i3 = i2 + n2;
+
+    /* input is in 1.31(q31) format and provide 4 guard bits for the input */
+
+    /*  Butterfly implementation */
+    /* xa + xc */
+    r1 = (pSrc[(2u * i0)] >> 4u) + (pSrc[(2u * i2)] >> 4u);
+    /* xa - xc */
+    r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
+
+    /* xb + xd */
+    t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
+
+    /* ya + yc */
+    s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
+    /* ya - yc */
+    s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
+
+    /* xa' = xa + xb + xc + xd */
+    pSrc[2u * i0] = (r1 + t1);
+    /* (xa + xc) - (xb + xd) */
+    r1 = r1 - t1;
+    /* yb + yd */
+    t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
+
+    /* ya' = ya + yb + yc + yd */
+    pSrc[(2u * i0) + 1u] = (s1 + t2);
+
+    /* (ya + yc) - (yb + yd) */
+    s1 = s1 - t2;
+
+    /* yb - yd */
+    t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
+    /* xb - xd */
+    t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
+
+    /*  index calculation for the coefficients */
+    ia2 = 2u * ia1;
+    co2 = pCoef[ia2 * 2u];
+    si2 = pCoef[(ia2 * 2u) + 1u];
+
+    /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
+    pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
+                     ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
+
+    /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
+    pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
+                            ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
+
+    /* (xa - xc) + (yb - yd) */
+    r1 = r2 + t1;
+    /* (xa - xc) - (yb - yd) */
+    r2 = r2 - t1;
+
+    /* (ya - yc) - (xb - xd) */
+    s1 = s2 - t2;
+    /* (ya - yc) + (xb - xd) */
+    s2 = s2 + t2;
+
+    co1 = pCoef[ia1 * 2u];
+    si1 = pCoef[(ia1 * 2u) + 1u];
+
+    /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
+    pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
+                     ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
+
+    /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
+    pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
+                            ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
+
+    /*  index calculation for the coefficients */
+    ia3 = 3u * ia1;
+    co3 = pCoef[ia3 * 2u];
+    si3 = pCoef[(ia3 * 2u) + 1u];
+
+    /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
+    pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
+                     ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
+
+    /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
+    pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
+                            ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
+
+    /*  Twiddle coefficients index modifier */
+    ia1 = ia1 + twidCoefModifier;
+
+    /*  Updating input index */
+    i0 = i0 + 1u;
+
+  } while(--j);
+
+  /* end of first stage process */
+
+  /* data is in 5.27(q27) format */
+
+
+  /* start of Middle stages process */
+
+
+  /* each stage in middle stages provides two down scaling of the input */
+
+  twidCoefModifier <<= 2u;
+
+
+  for (k = fftLen / 4u; k > 4u; k >>= 2u)
+  {
+    /*  Initializations for the first stage */
+    n1 = n2;
+    n2 >>= 2u;
+    ia1 = 0u;
+
+    /*  Calculation of first stage */
+    for (j = 0u; j <= (n2 - 1u); j++)
+    {
+      /*  index calculation for the coefficients */
+      ia2 = ia1 + ia1;
+      ia3 = ia2 + ia1;
+      co1 = pCoef[ia1 * 2u];
+      si1 = pCoef[(ia1 * 2u) + 1u];
+      co2 = pCoef[ia2 * 2u];
+      si2 = pCoef[(ia2 * 2u) + 1u];
+      co3 = pCoef[ia3 * 2u];
+      si3 = pCoef[(ia3 * 2u) + 1u];
+      /*  Twiddle coefficients index modifier */
+      ia1 = ia1 + twidCoefModifier;
+
+      for (i0 = j; i0 < fftLen; i0 += n1)
+      {
+        /*  index calculation for the input as, */
+        /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
+        i1 = i0 + n2;
+        i2 = i1 + n2;
+        i3 = i2 + n2;
+
+        /*  Butterfly implementation */
+        /* xa + xc */
+        r1 = pSrc[2u * i0] + pSrc[2u * i2];
+        /* xa - xc */
+        r2 = pSrc[2u * i0] - pSrc[2u * i2];
+
+        /* ya + yc */
+        s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
+        /* ya - yc */
+        s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
+
+        /* xb + xd */
+        t1 = pSrc[2u * i1] + pSrc[2u * i3];
+
+        /* xa' = xa + xb + xc + xd */
+        pSrc[2u * i0] = (r1 + t1) >> 2u;
+        /* xa + xc -(xb + xd) */
+        r1 = r1 - t1;
+
+        /* yb + yd */
+        t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
+        /* ya' = ya + yb + yc + yd */
+        pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
+
+        /* (ya + yc) - (yb + yd) */
+        s1 = s1 - t2;
+
+        /* (yb - yd) */
+        t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
+        /* (xb - xd) */
+        t2 = pSrc[2u * i1] - pSrc[2u * i3];
+
+        /* xc' = (xa-xb+xc-xd)co2 + (ya-yb+yc-yd)(si2) */
+        pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) +
+                         ((int32_t) (((q63_t) s1 * si2) >> 32))) >> 1u;
+
+        /* yc' = (ya-yb+yc-yd)co2 - (xa-xb+xc-xd)(si2) */
+        pSrc[(2u * i1) + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) -
+                                ((int32_t) (((q63_t) r1 * si2) >> 32))) >> 1u;
+
+        /* (xa - xc) + (yb - yd) */
+        r1 = r2 + t1;
+        /* (xa - xc) - (yb - yd) */
+        r2 = r2 - t1;
+
+        /* (ya - yc) -  (xb - xd) */
+        s1 = s2 - t2;
+        /* (ya - yc) +  (xb - xd) */
+        s2 = s2 + t2;
+
+        /* xb' = (xa+yb-xc-yd)co1 + (ya-xb-yc+xd)(si1) */
+        pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) +
+                         ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
+
+        /* yb' = (ya-xb-yc+xd)co1 - (xa+yb-xc-yd)(si1) */
+        pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) -
+                                ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
+
+        /* xd' = (xa-yb-xc+yd)co3 + (ya+xb-yc-xd)(si3) */
+        pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) +
+                         ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
+
+        /* yd' = (ya+xb-yc-xd)co3 - (xa-yb-xc+yd)(si3) */
+        pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) -
+                                ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
+      }
+    }
+    twidCoefModifier <<= 2u;
+  }
+
+  /* End of Middle stages process */
+
+  /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
+  /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
+  /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
+  /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
+
+
+  /* start of Last stage process */
+  /*  Initializations for the last stage */
+  j = fftLen >> 2;
+  ptr1 = &pSrc[0];
+
+  /*  Calculations of last stage */
+  do
+  {
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    /* Read xa (real), ya(imag) input */
+    xaya = *__SIMD64(ptr1)++;
+    xa = (q31_t) xaya;
+    ya = (q31_t) (xaya >> 32);
+
+    /* Read xb (real), yb(imag) input */
+    xbyb = *__SIMD64(ptr1)++;
+    xb = (q31_t) xbyb;
+    yb = (q31_t) (xbyb >> 32);
+
+    /* Read xc (real), yc(imag) input */
+    xcyc = *__SIMD64(ptr1)++;
+    xc = (q31_t) xcyc;
+    yc = (q31_t) (xcyc >> 32);
+
+    /* Read xc (real), yc(imag) input */
+    xdyd = *__SIMD64(ptr1)++;
+    xd = (q31_t) xdyd;
+    yd = (q31_t) (xdyd >> 32);
+
+#else
+
+    /* Read xa (real), ya(imag) input */
+    xaya = *__SIMD64(ptr1)++;
+    ya = (q31_t) xaya;
+    xa = (q31_t) (xaya >> 32);
+
+    /* Read xb (real), yb(imag) input */
+    xbyb = *__SIMD64(ptr1)++;
+    yb = (q31_t) xbyb;
+    xb = (q31_t) (xbyb >> 32);
+
+    /* Read xc (real), yc(imag) input */
+    xcyc = *__SIMD64(ptr1)++;
+    yc = (q31_t) xcyc;
+    xc = (q31_t) (xcyc >> 32);
+
+    /* Read xc (real), yc(imag) input */
+    xdyd = *__SIMD64(ptr1)++;
+    yd = (q31_t) xdyd;
+    xd = (q31_t) (xdyd >> 32);
+
+
+#endif
+
+    /* xa' = xa + xb + xc + xd */
+    xa_out = xa + xb + xc + xd;
+
+    /* ya' = ya + yb + yc + yd */
+    ya_out = ya + yb + yc + yd;
+
+    /* pointer updation for writing */
+    ptr1 = ptr1 - 8u;
+
+    /* writing xa' and ya' */
+    *ptr1++ = xa_out;
+    *ptr1++ = ya_out;
+
+    xc_out = (xa - xb + xc - xd);
+    yc_out = (ya - yb + yc - yd);
+
+    /* writing xc' and yc' */
+    *ptr1++ = xc_out;
+    *ptr1++ = yc_out;
+
+    xb_out = (xa + yb - xc - yd);
+    yb_out = (ya - xb - yc + xd);
+
+    /* writing xb' and yb' */
+    *ptr1++ = xb_out;
+    *ptr1++ = yb_out;
+
+    xd_out = (xa - yb - xc + yd);
+    yd_out = (ya + xb - yc - xd);
+
+    /* writing xd' and yd' */
+    *ptr1++ = xd_out;
+    *ptr1++ = yd_out;
+
+
+  } while(--j);
+
+  /* output is in 11.21(q21) format for the 1024 point */
+  /* output is in 9.23(q23) format for the 256 point */
+  /* output is in 7.25(q25) format for the 64 point */
+  /* output is in 5.27(q27) format for the 16 point */
+
+  /* End of last stage process */
+
+}
+
+
+/**    
+ * @brief  Core function for the Q31 CIFFT butterfly process.   
+ * @param[in, out] *pSrc            points to the in-place buffer of Q31 data type.   
+ * @param[in]      fftLen           length of the FFT.   
+ * @param[in]      *pCoef           points to twiddle coefficient buffer.   
+ * @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
+ * @return none.   
+ */
+
+
+/*    
+* Radix-4 IFFT algorithm used is :    
+*    
+* CIFFT uses same twiddle coefficients as CFFT Function    
+*  x[k] = x[n] + (j)k * x[n + fftLen/4] + (-1)k * x[n+fftLen/2] + (-j)k * x[n+3*fftLen/4]    
+*    
+*    
+* IFFT is implemented with following changes in equations from FFT    
+*    
+* Input real and imaginary data:    
+* x(n) = xa + j * ya    
+* x(n+N/4 ) = xb + j * yb    
+* x(n+N/2 ) = xc + j * yc    
+* x(n+3N 4) = xd + j * yd    
+*    
+*    
+* Output real and imaginary data:    
+* x(4r) = xa'+ j * ya'    
+* x(4r+1) = xb'+ j * yb'    
+* x(4r+2) = xc'+ j * yc'    
+* x(4r+3) = xd'+ j * yd'    
+*    
+*    
+* Twiddle factors for radix-4 IFFT:    
+* Wn = co1 + j * (si1)    
+* W2n = co2 + j * (si2)    
+* W3n = co3 + j * (si3)    
+    
+* The real and imaginary output values for the radix-4 butterfly are    
+* xa' = xa + xb + xc + xd    
+* ya' = ya + yb + yc + yd    
+* xb' = (xa-yb-xc+yd)* co1 - (ya+xb-yc-xd)* (si1)    
+* yb' = (ya+xb-yc-xd)* co1 + (xa-yb-xc+yd)* (si1)    
+* xc' = (xa-xb+xc-xd)* co2 - (ya-yb+yc-yd)* (si2)    
+* yc' = (ya-yb+yc-yd)* co2 + (xa-xb+xc-xd)* (si2)    
+* xd' = (xa+yb-xc-yd)* co3 - (ya-xb-yc+xd)* (si3)    
+* yd' = (ya-xb-yc+xd)* co3 + (xa+yb-xc-yd)* (si3)    
+*    
+*/
+
+void arm_radix4_butterfly_inverse_q31(
+  q31_t * pSrc,
+  uint32_t fftLen,
+  q31_t * pCoef,
+  uint32_t twidCoefModifier)
+{
+  uint32_t n1, n2, ia1, ia2, ia3, i0, i1, i2, i3, j, k;
+  q31_t t1, t2, r1, r2, s1, s2, co1, co2, co3, si1, si2, si3;
+  q31_t xa, xb, xc, xd;
+  q31_t ya, yb, yc, yd;
+  q31_t xa_out, xb_out, xc_out, xd_out;
+  q31_t ya_out, yb_out, yc_out, yd_out;
+
+  q31_t *ptr1;
+  q63_t xaya, xbyb, xcyc, xdyd;
+
+  /* input is be 1.31(q31) format for all FFT sizes */
+  /* Total process is divided into three stages */
+  /* process first stage, middle stages, & last stage */
+
+  /* Start of first stage process */
+
+  /* Initializations for the first stage */
+  n2 = fftLen;
+  n1 = n2;
+  /* n2 = fftLen/4 */
+  n2 >>= 2u;
+  i0 = 0u;
+  ia1 = 0u;
+
+  j = n2;
+
+  do
+  {
+
+    /* input is in 1.31(q31) format and provide 4 guard bits for the input */
+
+    /*  index calculation for the input as, */
+    /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
+    i1 = i0 + n2;
+    i2 = i1 + n2;
+    i3 = i2 + n2;
+
+    /*  Butterfly implementation */
+    /* xa + xc */
+    r1 = (pSrc[2u * i0] >> 4u) + (pSrc[2u * i2] >> 4u);
+    /* xa - xc */
+    r2 = (pSrc[2u * i0] >> 4u) - (pSrc[2u * i2] >> 4u);
+
+    /* xb + xd */
+    t1 = (pSrc[2u * i1] >> 4u) + (pSrc[2u * i3] >> 4u);
+
+    /* ya + yc */
+    s1 = (pSrc[(2u * i0) + 1u] >> 4u) + (pSrc[(2u * i2) + 1u] >> 4u);
+    /* ya - yc */
+    s2 = (pSrc[(2u * i0) + 1u] >> 4u) - (pSrc[(2u * i2) + 1u] >> 4u);
+
+    /* xa' = xa + xb + xc + xd */
+    pSrc[2u * i0] = (r1 + t1);
+    /* (xa + xc) - (xb + xd) */
+    r1 = r1 - t1;
+    /* yb + yd */
+    t2 = (pSrc[(2u * i1) + 1u] >> 4u) + (pSrc[(2u * i3) + 1u] >> 4u);
+    /* ya' = ya + yb + yc + yd */
+    pSrc[(2u * i0) + 1u] = (s1 + t2);
+
+    /* (ya + yc) - (yb + yd) */
+    s1 = s1 - t2;
+
+    /* yb - yd */
+    t1 = (pSrc[(2u * i1) + 1u] >> 4u) - (pSrc[(2u * i3) + 1u] >> 4u);
+    /* xb - xd */
+    t2 = (pSrc[2u * i1] >> 4u) - (pSrc[2u * i3] >> 4u);
+
+    /*  index calculation for the coefficients */
+    ia2 = 2u * ia1;
+    co2 = pCoef[ia2 * 2u];
+    si2 = pCoef[(ia2 * 2u) + 1u];
+
+    /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
+    pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32)) -
+                     ((int32_t) (((q63_t) s1 * si2) >> 32))) << 1u;
+
+    /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
+    pSrc[2u * i1 + 1u] = (((int32_t) (((q63_t) s1 * co2) >> 32)) +
+                          ((int32_t) (((q63_t) r1 * si2) >> 32))) << 1u;
+
+    /* (xa - xc) - (yb - yd) */
+    r1 = r2 - t1;
+    /* (xa - xc) + (yb - yd) */
+    r2 = r2 + t1;
+
+    /* (ya - yc) + (xb - xd) */
+    s1 = s2 + t2;
+    /* (ya - yc) - (xb - xd) */
+    s2 = s2 - t2;
+
+    co1 = pCoef[ia1 * 2u];
+    si1 = pCoef[(ia1 * 2u) + 1u];
+
+    /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
+    pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
+                     ((int32_t) (((q63_t) s1 * si1) >> 32))) << 1u;
+
+    /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
+    pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
+                            ((int32_t) (((q63_t) r1 * si1) >> 32))) << 1u;
+
+    /*  index calculation for the coefficients */
+    ia3 = 3u * ia1;
+    co3 = pCoef[ia3 * 2u];
+    si3 = pCoef[(ia3 * 2u) + 1u];
+
+    /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
+    pSrc[2u * i3] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
+                     ((int32_t) (((q63_t) s2 * si3) >> 32))) << 1u;
+
+    /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
+    pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
+                            ((int32_t) (((q63_t) r2 * si3) >> 32))) << 1u;
+
+    /*  Twiddle coefficients index modifier */
+    ia1 = ia1 + twidCoefModifier;
+
+    /*  Updating input index */
+    i0 = i0 + 1u;
+
+  } while(--j);
+
+  /* data is in 5.27(q27) format */
+  /* each stage provides two down scaling of the input */
+
+
+  /* Start of Middle stages process */
+
+  twidCoefModifier <<= 2u;
+
+  /*  Calculation of second stage to excluding last stage */
+  for (k = fftLen / 4u; k > 4u; k >>= 2u)
+  {
+    /*  Initializations for the first stage */
+    n1 = n2;
+    n2 >>= 2u;
+    ia1 = 0u;
+
+    for (j = 0; j <= (n2 - 1u); j++)
+    {
+      /*  index calculation for the coefficients */
+      ia2 = ia1 + ia1;
+      ia3 = ia2 + ia1;
+      co1 = pCoef[ia1 * 2u];
+      si1 = pCoef[(ia1 * 2u) + 1u];
+      co2 = pCoef[ia2 * 2u];
+      si2 = pCoef[(ia2 * 2u) + 1u];
+      co3 = pCoef[ia3 * 2u];
+      si3 = pCoef[(ia3 * 2u) + 1u];
+      /*  Twiddle coefficients index modifier */
+      ia1 = ia1 + twidCoefModifier;
+
+      for (i0 = j; i0 < fftLen; i0 += n1)
+      {
+        /*  index calculation for the input as, */
+        /*  pSrc[i0 + 0], pSrc[i0 + fftLen/4], pSrc[i0 + fftLen/2u], pSrc[i0 + 3fftLen/4] */
+        i1 = i0 + n2;
+        i2 = i1 + n2;
+        i3 = i2 + n2;
+
+        /*  Butterfly implementation */
+        /* xa + xc */
+        r1 = pSrc[2u * i0] + pSrc[2u * i2];
+        /* xa - xc */
+        r2 = pSrc[2u * i0] - pSrc[2u * i2];
+
+        /* ya + yc */
+        s1 = pSrc[(2u * i0) + 1u] + pSrc[(2u * i2) + 1u];
+        /* ya - yc */
+        s2 = pSrc[(2u * i0) + 1u] - pSrc[(2u * i2) + 1u];
+
+        /* xb + xd */
+        t1 = pSrc[2u * i1] + pSrc[2u * i3];
+
+        /* xa' = xa + xb + xc + xd */
+        pSrc[2u * i0] = (r1 + t1) >> 2u;
+        /* xa + xc -(xb + xd) */
+        r1 = r1 - t1;
+        /* yb + yd */
+        t2 = pSrc[(2u * i1) + 1u] + pSrc[(2u * i3) + 1u];
+        /* ya' = ya + yb + yc + yd */
+        pSrc[(2u * i0) + 1u] = (s1 + t2) >> 2u;
+
+        /* (ya + yc) - (yb + yd) */
+        s1 = s1 - t2;
+
+        /* (yb - yd) */
+        t1 = pSrc[(2u * i1) + 1u] - pSrc[(2u * i3) + 1u];
+        /* (xb - xd) */
+        t2 = pSrc[2u * i1] - pSrc[2u * i3];
+
+        /* xc' = (xa-xb+xc-xd)co2 - (ya-yb+yc-yd)(si2) */
+        pSrc[2u * i1] = (((int32_t) (((q63_t) r1 * co2) >> 32u)) -
+                         ((int32_t) (((q63_t) s1 * si2) >> 32u))) >> 1u;
+
+        /* yc' = (ya-yb+yc-yd)co2 + (xa-xb+xc-xd)(si2) */
+        pSrc[(2u * i1) + 1u] =
+          (((int32_t) (((q63_t) s1 * co2) >> 32u)) +
+           ((int32_t) (((q63_t) r1 * si2) >> 32u))) >> 1u;
+
+        /* (xa - xc) - (yb - yd) */
+        r1 = r2 - t1;
+        /* (xa - xc) + (yb - yd) */
+        r2 = r2 + t1;
+
+        /* (ya - yc) +  (xb - xd) */
+        s1 = s2 + t2;
+        /* (ya - yc) -  (xb - xd) */
+        s2 = s2 - t2;
+
+        /* xb' = (xa+yb-xc-yd)co1 - (ya-xb-yc+xd)(si1) */
+        pSrc[2u * i2] = (((int32_t) (((q63_t) r1 * co1) >> 32)) -
+                         ((int32_t) (((q63_t) s1 * si1) >> 32))) >> 1u;
+
+        /* yb' = (ya-xb-yc+xd)co1 + (xa+yb-xc-yd)(si1) */
+        pSrc[(2u * i2) + 1u] = (((int32_t) (((q63_t) s1 * co1) >> 32)) +
+                                ((int32_t) (((q63_t) r1 * si1) >> 32))) >> 1u;
+
+        /* xd' = (xa-yb-xc+yd)co3 - (ya+xb-yc-xd)(si3) */
+        pSrc[(2u * i3)] = (((int32_t) (((q63_t) r2 * co3) >> 32)) -
+                           ((int32_t) (((q63_t) s2 * si3) >> 32))) >> 1u;
+
+        /* yd' = (ya+xb-yc-xd)co3 + (xa-yb-xc+yd)(si3) */
+        pSrc[(2u * i3) + 1u] = (((int32_t) (((q63_t) s2 * co3) >> 32)) +
+                                ((int32_t) (((q63_t) r2 * si3) >> 32))) >> 1u;
+      }
+    }
+    twidCoefModifier <<= 2u;
+  }
+
+  /* End of Middle stages process */
+
+  /* data is in 11.21(q21) format for the 1024 point as there are 3 middle stages */
+  /* data is in 9.23(q23) format for the 256 point as there are 2 middle stages */
+  /* data is in 7.25(q25) format for the 64 point as there are 1 middle stage */
+  /* data is in 5.27(q27) format for the 16 point as there are no middle stages */
+
+
+  /* Start of last stage process */
+
+
+  /*  Initializations for the last stage */
+  j = fftLen >> 2;
+  ptr1 = &pSrc[0];
+
+  /*  Calculations of last stage */
+  do
+  {
+#ifndef ARM_MATH_BIG_ENDIAN
+    /* Read xa (real), ya(imag) input */
+    xaya = *__SIMD64(ptr1)++;
+    xa = (q31_t) xaya;
+    ya = (q31_t) (xaya >> 32);
+
+    /* Read xb (real), yb(imag) input */
+    xbyb = *__SIMD64(ptr1)++;
+    xb = (q31_t) xbyb;
+    yb = (q31_t) (xbyb >> 32);
+
+    /* Read xc (real), yc(imag) input */
+    xcyc = *__SIMD64(ptr1)++;
+    xc = (q31_t) xcyc;
+    yc = (q31_t) (xcyc >> 32);
+
+    /* Read xc (real), yc(imag) input */
+    xdyd = *__SIMD64(ptr1)++;
+    xd = (q31_t) xdyd;
+    yd = (q31_t) (xdyd >> 32);
+
+#else
+
+    /* Read xa (real), ya(imag) input */
+    xaya = *__SIMD64(ptr1)++;
+    ya = (q31_t) xaya;
+    xa = (q31_t) (xaya >> 32);
+
+    /* Read xb (real), yb(imag) input */
+    xbyb = *__SIMD64(ptr1)++;
+    yb = (q31_t) xbyb;
+    xb = (q31_t) (xbyb >> 32);
+
+    /* Read xc (real), yc(imag) input */
+    xcyc = *__SIMD64(ptr1)++;
+    yc = (q31_t) xcyc;
+    xc = (q31_t) (xcyc >> 32);
+
+    /* Read xc (real), yc(imag) input */
+    xdyd = *__SIMD64(ptr1)++;
+    yd = (q31_t) xdyd;
+    xd = (q31_t) (xdyd >> 32);
+
+
+#endif
+
+    /* xa' = xa + xb + xc + xd */
+    xa_out = xa + xb + xc + xd;
+
+    /* ya' = ya + yb + yc + yd */
+    ya_out = ya + yb + yc + yd;
+
+    /* pointer updation for writing */
+    ptr1 = ptr1 - 8u;
+
+    /* writing xa' and ya' */
+    *ptr1++ = xa_out;
+    *ptr1++ = ya_out;
+
+    xc_out = (xa - xb + xc - xd);
+    yc_out = (ya - yb + yc - yd);
+
+    /* writing xc' and yc' */
+    *ptr1++ = xc_out;
+    *ptr1++ = yc_out;
+
+    xb_out = (xa - yb - xc + yd);
+    yb_out = (ya + xb - yc - xd);
+
+    /* writing xb' and yb' */
+    *ptr1++ = xb_out;
+    *ptr1++ = yb_out;
+
+    xd_out = (xa + yb - xc - yd);
+    yd_out = (ya - xb - yc + xd);
+
+    /* writing xd' and yd' */
+    *ptr1++ = xd_out;
+    *ptr1++ = yd_out;
+
+
+  } while(--j);
+
+  /* output is in 11.21(q21) format for the 1024 point */
+  /* output is in 9.23(q23) format for the 256 point */
+  /* output is in 7.25(q25) format for the 64 point */
+  /* output is in 5.27(q27) format for the 16 point */
+
+  /* End of last stage process */
+}
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix8_f32.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_cfft_radix8_f32.c
@ -0,0 +1,384 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_cfft_radix8_f32.c    
+*    
+* Description:	Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function        
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.      
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+* @ingroup groupTransforms    
+*/
+
+/**    
+* @defgroup Radix8_CFFT_CIFFT Radix-8 Complex FFT Functions    
+*    
+* \par    
+* Complex Fast Fourier Transform(CFFT) and Complex Inverse Fast Fourier Transform(CIFFT) is an efficient algorithm to compute Discrete Fourier Transform(DFT) and Inverse Discrete Fourier Transform(IDFT).    
+* Computational complexity of CFFT reduces drastically when compared to DFT.    
+* \par    
+* This set of functions implements CFFT/CIFFT    
+* for floating-point data types.  The functions operates on in-place buffer which uses same buffer for input and output.    
+* Complex input is stored in input buffer in an interleaved fashion.    
+*    
+* \par    
+* The functions operate on blocks of input and output data and each call to the function processes    
+* <code>2*fftLen</code> samples through the transform.  <code>pSrc</code>  points to In-place arrays containing <code>2*fftLen</code> values.    
+* \par   
+* The <code>pSrc</code> points to the array of in-place buffer of size <code>2*fftLen</code> and inputs and outputs are stored in an interleaved fashion as shown below.    
+* <pre> {real[0], imag[0], real[1], imag[1],..} </pre>    
+*    
+* \par Lengths supported by the transform:   
+* \par    
+* Internally, the function utilize a Radix-8 decimation in frequency(DIF) algorithm    
+* and the size of the FFT supported are of the lengths [ 64, 512, 4096].   
+*     
+*    
+* \par Algorithm:    
+*    
+* <b>Complex Fast Fourier Transform:</b>    
+* \par     
+* Input real and imaginary data:    
+* <pre>    
+* x(n) = xa + j * ya    
+* x(n+N/4 ) = xb + j * yb    
+* x(n+N/2 ) = xc + j * yc    
+* x(n+3N 4) = xd + j * yd    
+* </pre>    
+* where N is length of FFT    
+* \par    
+* Output real and imaginary data:    
+* <pre>    
+* X(4r) = xa'+ j * ya'    
+* X(4r+1) = xb'+ j * yb'    
+* X(4r+2) = xc'+ j * yc'    
+* X(4r+3) = xd'+ j * yd'    
+* </pre>    
+* \par    
+* Twiddle factors for Radix-8 FFT:    
+* <pre>    
+* Wn = co1 + j * (- si1)    
+* W2n = co2 + j * (- si2)    
+* W3n = co3 + j * (- si3)    
+* </pre>    
+*    
+* \par    
+* \image html CFFT.gif "Radix-8 Decimation-in Frequency Complex Fast Fourier Transform"    
+*    
+* \par    
+* Output from Radix-8 CFFT Results in Digit reversal order. Interchange middle two branches of every butterfly results in Bit reversed output.    
+* \par    
+* <b> Butterfly CFFT equations:</b>    
+* <pre>    
+* xa' = xa + xb + xc + xd    
+* ya' = ya + yb + yc + yd    
+* xc' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1)    
+* yc' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1)    
+* xb' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2)    
+* yb' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2)    
+* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3)    
+* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3)    
+* </pre>    
+*    
+* \par    
+* where <code>fftLen</code> length of CFFT/CIFFT; <code>ifftFlag</code> Flag for selection of CFFT or CIFFT(Set ifftFlag to calculate CIFFT otherwise calculates CFFT);    
+* <code>bitReverseFlag</code> Flag for selection of output order(Set bitReverseFlag to output in normal order otherwise output in bit reversed order);     
+* <code>pTwiddle</code>points to array of twiddle coefficients; <code>pBitRevTable</code> points to the array of bit reversal table.    
+* <code>twidCoefModifier</code> modifier for twiddle factor table which supports all FFT lengths with same table;     
+* <code>pBitRevTable</code> modifier for bit reversal table which supports all FFT lengths with same table.    
+* <code>onebyfftLen</code> value of 1/fftLen to calculate CIFFT;    
+*   
+* \par Fixed-Point Behavior    
+* Care must be taken when using the fixed-point versions of the CFFT/CIFFT function.    
+* Refer to the function specific documentation below for usage guidelines.    
+*/
+
+
+/*    
+* @brief  Core function for the floating-point CFFT butterfly process.   
+* @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.   
+* @param[in]      fftLen           length of the FFT.   
+* @param[in]      *pCoef           points to the twiddle coefficient buffer.   
+* @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
+* @return none.   
+*/
+
+void arm_radix8_butterfly_f32(
+float32_t * pSrc,
+uint16_t fftLen,
+const float32_t * pCoef,
+uint16_t twidCoefModifier)
+{
+   uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7;
+   uint32_t i1, i2, i3, i4, i5, i6, i7, i8;
+   uint32_t id;
+   uint32_t n1, n2, j;
+   
+   float32_t r1, r2, r3, r4, r5, r6, r7, r8;
+   float32_t t1, t2;
+   float32_t s1, s2, s3, s4, s5, s6, s7, s8;
+   float32_t p1, p2, p3, p4;
+   float32_t co2, co3, co4, co5, co6, co7, co8;
+   float32_t si2, si3, si4, si5, si6, si7, si8;
+   const float32_t C81 = 0.70710678118f;
+
+   n2 = fftLen;
+   
+   do 
+   {
+      n1 = n2;
+      n2 = n2 >> 3;
+      i1 = 0;
+      
+      do
+      {
+         i2 = i1 + n2;
+         i3 = i2 + n2;
+         i4 = i3 + n2;
+         i5 = i4 + n2;
+         i6 = i5 + n2;
+         i7 = i6 + n2;
+         i8 = i7 + n2;
+         r1 = pSrc[2 * i1] + pSrc[2 * i5];
+         r5 = pSrc[2 * i1] - pSrc[2 * i5];
+         r2 = pSrc[2 * i2] + pSrc[2 * i6];
+         r6 = pSrc[2 * i2] - pSrc[2 * i6];
+         r3 = pSrc[2 * i3] + pSrc[2 * i7];
+         r7 = pSrc[2 * i3] - pSrc[2 * i7];
+         r4 = pSrc[2 * i4] + pSrc[2 * i8];
+         r8 = pSrc[2 * i4] - pSrc[2 * i8];
+         t1 = r1 - r3;
+         r1 = r1 + r3;
+         r3 = r2 - r4;
+         r2 = r2 + r4;
+         pSrc[2 * i1] = r1 + r2;   
+         pSrc[2 * i5] = r1 - r2;
+         r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
+         s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
+         r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
+         s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
+         s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
+         s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
+         r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
+         s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
+         t2 = r1 - s3;
+         r1 = r1 + s3;
+         s3 = r2 - r4;
+         r2 = r2 + r4;
+         pSrc[2 * i1 + 1] = r1 + r2;
+         pSrc[2 * i5 + 1] = r1 - r2;
+         pSrc[2 * i3]     = t1 + s3;
+         pSrc[2 * i7]     = t1 - s3;
+         pSrc[2 * i3 + 1] = t2 - r3;
+         pSrc[2 * i7 + 1] = t2 + r3;
+         r1 = (r6 - r8) * C81;
+         r6 = (r6 + r8) * C81;
+         r2 = (s6 - s8) * C81;
+         s6 = (s6 + s8) * C81;
+         t1 = r5 - r1;
+         r5 = r5 + r1;
+         r8 = r7 - r6;
+         r7 = r7 + r6;
+         t2 = s5 - r2;
+         s5 = s5 + r2;
+         s8 = s7 - s6;
+         s7 = s7 + s6;
+         pSrc[2 * i2]     = r5 + s7;
+         pSrc[2 * i8]     = r5 - s7;
+         pSrc[2 * i6]     = t1 + s8;
+         pSrc[2 * i4]     = t1 - s8;
+         pSrc[2 * i2 + 1] = s5 - r7;
+         pSrc[2 * i8 + 1] = s5 + r7;
+         pSrc[2 * i6 + 1] = t2 - r8;
+         pSrc[2 * i4 + 1] = t2 + r8;
+         
+         i1 += n1;
+      } while(i1 < fftLen);
+      
+      if(n2 < 8)
+         break;
+      
+      ia1 = 0;
+      j = 1;
+      
+      do
+      {      
+         /*  index calculation for the coefficients */
+         id  = ia1 + twidCoefModifier;
+         ia1 = id;
+         ia2 = ia1 + id;
+         ia3 = ia2 + id;
+         ia4 = ia3 + id;
+         ia5 = ia4 + id;
+         ia6 = ia5 + id;
+         ia7 = ia6 + id;
+                  
+         co2 = pCoef[2 * ia1];
+         co3 = pCoef[2 * ia2];
+         co4 = pCoef[2 * ia3];
+         co5 = pCoef[2 * ia4];
+         co6 = pCoef[2 * ia5];
+         co7 = pCoef[2 * ia6];
+         co8 = pCoef[2 * ia7];
+         si2 = pCoef[2 * ia1 + 1];
+         si3 = pCoef[2 * ia2 + 1];
+         si4 = pCoef[2 * ia3 + 1];
+         si5 = pCoef[2 * ia4 + 1];
+         si6 = pCoef[2 * ia5 + 1];
+         si7 = pCoef[2 * ia6 + 1];
+         si8 = pCoef[2 * ia7 + 1];         
+         
+         i1 = j;
+         
+         do
+         {
+            /*  index calculation for the input */
+            i2 = i1 + n2;
+            i3 = i2 + n2;
+            i4 = i3 + n2;
+            i5 = i4 + n2;
+            i6 = i5 + n2;
+            i7 = i6 + n2;
+            i8 = i7 + n2;
+            r1 = pSrc[2 * i1] + pSrc[2 * i5];
+            r5 = pSrc[2 * i1] - pSrc[2 * i5];
+            r2 = pSrc[2 * i2] + pSrc[2 * i6];
+            r6 = pSrc[2 * i2] - pSrc[2 * i6];
+            r3 = pSrc[2 * i3] + pSrc[2 * i7];
+            r7 = pSrc[2 * i3] - pSrc[2 * i7];
+            r4 = pSrc[2 * i4] + pSrc[2 * i8];
+            r8 = pSrc[2 * i4] - pSrc[2 * i8];
+            t1 = r1 - r3;
+            r1 = r1 + r3;
+            r3 = r2 - r4;
+            r2 = r2 + r4;
+            pSrc[2 * i1] = r1 + r2;
+            r2 = r1 - r2;
+            s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
+            s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
+            s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
+            s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
+            s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
+            s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
+            s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
+            s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
+            t2 = s1 - s3;
+            s1 = s1 + s3;
+            s3 = s2 - s4;
+            s2 = s2 + s4;
+            r1 = t1 + s3;
+            t1 = t1 - s3;
+            pSrc[2 * i1 + 1] = s1 + s2;
+            s2 = s1 - s2;
+            s1 = t2 - r3;
+            t2 = t2 + r3;
+            p1 = co5 * r2;
+            p2 = si5 * s2;
+            p3 = co5 * s2;
+            p4 = si5 * r2;
+            pSrc[2 * i5]     = p1 + p2;
+            pSrc[2 * i5 + 1] = p3 - p4;
+            p1 = co3 * r1;
+            p2 = si3 * s1;
+            p3 = co3 * s1;
+            p4 = si3 * r1;
+            pSrc[2 * i3]     = p1 + p2;
+            pSrc[2 * i3 + 1] = p3 - p4;
+            p1 = co7 * t1;
+            p2 = si7 * t2;
+            p3 = co7 * t2;
+            p4 = si7 * t1;
+            pSrc[2 * i7]     = p1 + p2;
+            pSrc[2 * i7 + 1] = p3 - p4;
+            r1 = (r6 - r8) * C81;
+            r6 = (r6 + r8) * C81;
+            s1 = (s6 - s8) * C81;
+            s6 = (s6 + s8) * C81;
+            t1 = r5 - r1;
+            r5 = r5 + r1;
+            r8 = r7 - r6;
+            r7 = r7 + r6;
+            t2 = s5 - s1;
+            s5 = s5 + s1;
+            s8 = s7 - s6;
+            s7 = s7 + s6;
+            r1 = r5 + s7;
+            r5 = r5 - s7;
+            r6 = t1 + s8;
+            t1 = t1 - s8;
+            s1 = s5 - r7;
+            s5 = s5 + r7;
+            s6 = t2 - r8;
+            t2 = t2 + r8;
+            p1 = co2 * r1;
+            p2 = si2 * s1;
+            p3 = co2 * s1;
+            p4 = si2 * r1;
+            pSrc[2 * i2]     = p1 + p2;
+            pSrc[2 * i2 + 1] = p3 - p4;
+            p1 = co8 * r5;
+            p2 = si8 * s5;
+            p3 = co8 * s5;
+            p4 = si8 * r5;
+            pSrc[2 * i8]     = p1 + p2;
+            pSrc[2 * i8 + 1] = p3 - p4;
+            p1 = co6 * r6;
+            p2 = si6 * s6;
+            p3 = co6 * s6;
+            p4 = si6 * r6;
+            pSrc[2 * i6]     = p1 + p2;
+            pSrc[2 * i6 + 1] = p3 - p4;
+            p1 = co4 * t1;
+            p2 = si4 * t2;
+            p3 = co4 * t2;
+            p4 = si4 * t1;
+            pSrc[2 * i4]     = p1 + p2;
+            pSrc[2 * i4 + 1] = p3 - p4;
+            
+            i1 += n1;
+         } while(i1 < fftLen);
+         
+         j++;
+      } while(j < n2);
+      
+      twidCoefModifier <<= 3;
+   } while(n2 > 7);   
+}
+
+/**    
+* @} end of Radix8_CFFT_CIFFT group    
+*/
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_dct4_f32.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_dct4_f32.c
@ -0,0 +1,461 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_dct4_f32.c    
+*    
+* Description:	Processing function of DCT4 & IDCT4 F32.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.      
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @ingroup groupTransforms    
+ */
+
+/**    
+ * @defgroup DCT4_IDCT4 DCT Type IV Functions    
+ * Representation of signals by minimum number of values is important for storage and transmission.    
+ * The possibility of large discontinuity between the beginning and end of a period of a signal    
+ * in DFT can be avoided by extending the signal so that it is even-symmetric.    
+ * Discrete Cosine Transform (DCT) is constructed such that its energy is heavily concentrated in the lower part of the    
+ * spectrum and is very widely used in signal and image coding applications.    
+ * The family of DCTs (DCT type- 1,2,3,4) is the outcome of different combinations of homogeneous boundary conditions.    
+ * DCT has an excellent energy-packing capability, hence has many applications and in data compression in particular.    
+ *    
+ * DCT is essentially the Discrete Fourier Transform(DFT) of an even-extended real signal.    
+ * Reordering of the input data makes the computation of DCT just a problem of    
+ * computing the DFT of a real signal with a few additional operations.    
+ * This approach provides regular, simple, and very efficient DCT algorithms for practical hardware and software implementations.    
+ *     
+ * DCT type-II can be implemented using Fast fourier transform (FFT) internally, as the transform is applied on real values, Real FFT can be used.    
+ * DCT4 is implemented using DCT2 as their implementations are similar except with some added pre-processing and post-processing.    
+ * DCT2 implementation can be described in the following steps:    
+ * - Re-ordering input    
+ * - Calculating Real FFT    
+ * - Multiplication of weights and Real FFT output and getting real part from the product.    
+ *    
+ * This process is explained by the block diagram below:    
+ * \image html DCT4.gif "Discrete Cosine Transform - type-IV"    
+ *    
+ * \par Algorithm:    
+ * The N-point type-IV DCT is defined as a real, linear transformation by the formula:    
+ * \image html DCT4Equation.gif    
+ * where <code>k = 0,1,2,.....N-1</code>    
+ *\par    
+ * Its inverse is defined as follows:    
+ * \image html IDCT4Equation.gif    
+ * where <code>n = 0,1,2,.....N-1</code>    
+ *\par    
+ * The DCT4 matrices become involutory (i.e. they are self-inverse) by multiplying with an overall scale factor of sqrt(2/N).    
+ * The symmetry of the transform matrix indicates that the fast algorithms for the forward    
+ * and inverse transform computation are identical.    
+ * Note that the implementation of Inverse DCT4 and DCT4 is same, hence same process function can be used for both.    
+ *    
+ * \par Lengths supported by the transform:    
+ *  As DCT4 internally uses Real FFT, it supports all the lengths supported by arm_rfft_f32().    
+ * The library provides separate functions for Q15, Q31, and floating-point data types.    
+ * \par Instance Structure    
+ * The instances for Real FFT and FFT, cosine values table and twiddle factor table are stored in an instance data structure.    
+ * A separate instance structure must be defined for each transform.    
+ * There are separate instance structure declarations for each of the 3 supported data types.    
+ *    
+ * \par Initialization Functions    
+ * There is also an associated initialization function for each data type.    
+ * The initialization function performs the following operations:    
+ * - Sets the values of the internal structure fields.    
+ * - Initializes Real FFT as its process function is used internally in DCT4, by calling arm_rfft_init_f32().    
+ * \par    
+ * Use of the initialization function is optional.    
+ * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.    
+ * To place an instance structure into a const data section, the instance structure must be manually initialized.    
+ * Manually initialize the instance structure as follows:    
+ * <pre>    
+ *arm_dct4_instance_f32 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};    
+ *arm_dct4_instance_q31 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};   
+ *arm_dct4_instance_q15 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};   
+ * </pre>   
+ * where \c N is the length of the DCT4; \c Nby2 is half of the length of the DCT4;   
+ * \c normalize is normalizing factor used and is equal to <code>sqrt(2/N)</code>;    
+ * \c pTwiddle points to the twiddle factor table;   
+ * \c pCosFactor points to the cosFactor table;   
+ * \c pRfft points to the real FFT instance;   
+ * \c pCfft points to the complex FFT instance;   
+ * The CFFT and RFFT structures also needs to be initialized, refer to arm_cfft_radix4_f32()   
+ * and arm_rfft_f32() respectively for details regarding static initialization.   
+ *   
+ * \par Fixed-Point Behavior    
+ * Care must be taken when using the fixed-point versions of the DCT4 transform functions.    
+ * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.    
+ * Refer to the function specific documentation below for usage guidelines.    
+ */
+
+ /**    
+ * @addtogroup DCT4_IDCT4    
+ * @{    
+ */
+
+/**    
+ * @brief Processing function for the floating-point DCT4/IDCT4.   
+ * @param[in]       *S             points to an instance of the floating-point DCT4/IDCT4 structure.   
+ * @param[in]       *pState        points to state buffer.   
+ * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.   
+ * @return none.   
+ */
+
+void arm_dct4_f32(
+  const arm_dct4_instance_f32 * S,
+  float32_t * pState,
+  float32_t * pInlineBuffer)
+{
+  uint32_t i;                                    /* Loop counter */
+  float32_t *weights = S->pTwiddle;              /* Pointer to the Weights table */
+  float32_t *cosFact = S->pCosFactor;            /* Pointer to the cos factors table */
+  float32_t *pS1, *pS2, *pbuff;                  /* Temporary pointers for input buffer and pState buffer */
+  float32_t in;                                  /* Temporary variable */
+
+
+  /* DCT4 computation involves DCT2 (which is calculated using RFFT)    
+   * along with some pre-processing and post-processing.    
+   * Computational procedure is explained as follows:    
+   * (a) Pre-processing involves multiplying input with cos factor,    
+   *     r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))    
+   *              where,    
+   *                 r(n) -- output of preprocessing    
+   *                 u(n) -- input to preprocessing(actual Source buffer)    
+   * (b) Calculation of DCT2 using FFT is divided into three steps:    
+   *                  Step1: Re-ordering of even and odd elements of input.    
+   *                  Step2: Calculating FFT of the re-ordered input.    
+   *                  Step3: Taking the real part of the product of FFT output and weights.    
+   * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:    
+   *                   Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)    
+   *                        where,    
+   *                           Y4 -- DCT4 output,   Y2 -- DCT2 output    
+   * (d) Multiplying the output with the normalizing factor sqrt(2/N).    
+   */
+
+        /*-------- Pre-processing ------------*/
+  /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
+  arm_scale_f32(pInlineBuffer, 2.0f, pInlineBuffer, S->N);
+  arm_mult_f32(pInlineBuffer, cosFact, pInlineBuffer, S->N);
+
+  /* ----------------------------------------------------------------    
+   * Step1: Re-ordering of even and odd elements as,    
+   *             pState[i] =  pInlineBuffer[2*i] and    
+   *             pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2    
+   ---------------------------------------------------------------------*/
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
+  pS2 = pState + (S->N - 1u);
+
+  /* pbuff initialized to input buffer */
+  pbuff = pInlineBuffer;
+
+#ifndef ARM_MATH_CM0_FAMILY
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
+  i = (uint32_t) S->Nby2 >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  do
+  {
+    /* Re-ordering of even and odd elements */
+    /* pState[i] =  pInlineBuffer[2*i] */
+    *pS1++ = *pbuff++;
+    /* pState[N-i-1] = pInlineBuffer[2*i+1] */
+    *pS2-- = *pbuff++;
+
+    *pS1++ = *pbuff++;
+    *pS2-- = *pbuff++;
+
+    *pS1++ = *pbuff++;
+    *pS2-- = *pbuff++;
+
+    *pS1++ = *pbuff++;
+    *pS2-- = *pbuff++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+  /* pbuff initialized to input buffer */
+  pbuff = pInlineBuffer;
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* Initializing the loop counter to N/4 instead of N for loop unrolling */
+  i = (uint32_t) S->N >> 2u;
+
+  /* Processing with loop unrolling 4 times as N is always multiple of 4.    
+   * Compute 4 outputs at a time */
+  do
+  {
+    /* Writing the re-ordered output back to inplace input buffer */
+    *pbuff++ = *pS1++;
+    *pbuff++ = *pS1++;
+    *pbuff++ = *pS1++;
+    *pbuff++ = *pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+
+  /* ---------------------------------------------------------    
+   *     Step2: Calculate RFFT for N-point input    
+   * ---------------------------------------------------------- */
+  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
+  arm_rfft_f32(S->pRfft, pInlineBuffer, pState);
+
+        /*----------------------------------------------------------------------    
+	 *  Step3: Multiply the FFT output with the weights.    
+	 *----------------------------------------------------------------------*/
+  arm_cmplx_mult_cmplx_f32(pState, weights, pState, S->N);
+
+  /* ----------- Post-processing ---------- */
+  /* DCT-IV can be obtained from DCT-II by the equation,    
+   *       Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)    
+   *       Hence, Y4(0) = Y2(0)/2  */
+  /* Getting only real part from the output and Converting to DCT-IV */
+
+  /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
+  i = ((uint32_t) S->N - 1u) >> 2u;
+
+  /* pbuff initialized to input buffer. */
+  pbuff = pInlineBuffer;
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
+  in = *pS1++ * (float32_t) 0.5;
+  /* input buffer acts as inplace, so output values are stored in the input itself. */
+  *pbuff++ = in;
+
+  /* pState pointer is incremented twice as the real values are located alternatively in the array */
+  pS1++;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  do
+  {
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    /* points to the next real value */
+    pS1++;
+
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    pS1++;
+
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    pS1++;
+
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  i = ((uint32_t) S->N - 1u) % 0x4u;
+
+  while(i > 0u)
+  {
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    /* points to the next real value */
+    pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  }
+
+
+        /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+
+  /* Initializing the loop counter to N/4 instead of N for loop unrolling */
+  i = (uint32_t) S->N >> 2u;
+
+  /* pbuff initialized to the pInlineBuffer(now contains the output values) */
+  pbuff = pInlineBuffer;
+
+  /* Processing with loop unrolling 4 times as N is always multiple of 4.  Compute 4 outputs at a time */
+  do
+  {
+    /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
+    in = *pbuff;
+    *pbuff++ = in * S->normalize;
+
+    in = *pbuff;
+    *pbuff++ = in * S->normalize;
+
+    in = *pbuff;
+    *pbuff++ = in * S->normalize;
+
+    in = *pbuff;
+    *pbuff++ = in * S->normalize;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Initializing the loop counter to N/2 */
+  i = (uint32_t) S->Nby2;
+
+  do
+  {
+    /* Re-ordering of even and odd elements */
+    /* pState[i] =  pInlineBuffer[2*i] */
+    *pS1++ = *pbuff++;
+    /* pState[N-i-1] = pInlineBuffer[2*i+1] */
+    *pS2-- = *pbuff++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+  /* pbuff initialized to input buffer */
+  pbuff = pInlineBuffer;
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* Initializing the loop counter */
+  i = (uint32_t) S->N;
+
+  do
+  {
+    /* Writing the re-ordered output back to inplace input buffer */
+    *pbuff++ = *pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+
+  /* ---------------------------------------------------------    
+   *     Step2: Calculate RFFT for N-point input    
+   * ---------------------------------------------------------- */
+  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
+  arm_rfft_f32(S->pRfft, pInlineBuffer, pState);
+
+        /*----------------------------------------------------------------------    
+	 *  Step3: Multiply the FFT output with the weights.    
+	 *----------------------------------------------------------------------*/
+  arm_cmplx_mult_cmplx_f32(pState, weights, pState, S->N);
+
+  /* ----------- Post-processing ---------- */
+  /* DCT-IV can be obtained from DCT-II by the equation,    
+   *       Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)    
+   *       Hence, Y4(0) = Y2(0)/2  */
+  /* Getting only real part from the output and Converting to DCT-IV */
+
+  /* pbuff initialized to input buffer. */
+  pbuff = pInlineBuffer;
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
+  in = *pS1++ * (float32_t) 0.5;
+  /* input buffer acts as inplace, so output values are stored in the input itself. */
+  *pbuff++ = in;
+
+  /* pState pointer is incremented twice as the real values are located alternatively in the array */
+  pS1++;
+
+  /* Initializing the loop counter */
+  i = ((uint32_t) S->N - 1u);
+
+  do
+  {
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    /* points to the next real value */
+    pS1++;
+
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+
+        /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+
+  /* Initializing the loop counter */
+  i = (uint32_t) S->N;
+
+  /* pbuff initialized to the pInlineBuffer(now contains the output values) */
+  pbuff = pInlineBuffer;
+
+  do
+  {
+    /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
+    in = *pbuff;
+    *pbuff++ = in * S->normalize;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+#endif /* #ifndef ARM_MATH_CM0_FAMILY */
+
+}
+
+/**    
+   * @} end of DCT4_IDCT4 group    
+   */
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_dct4_init_f32.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_dct4_init_f32.c
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_dct4_init_q15.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_dct4_init_q15.c
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_dct4_init_q31.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_dct4_init_q31.c
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_dct4_q15.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_dct4_q15.c
@ -0,0 +1,394 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_dct4_q15.c    
+*    
+* Description:	Processing function of DCT4 & IDCT4 Q15.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.  
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @addtogroup DCT4_IDCT4    
+ * @{    
+ */
+
+/**    
+ * @brief Processing function for the Q15 DCT4/IDCT4.   
+ * @param[in]       *S             points to an instance of the Q15 DCT4 structure.   
+ * @param[in]       *pState        points to state buffer.   
+ * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.   
+ * @return none.   
+ *     
+ * \par Input an output formats:    
+ * Internally inputs are downscaled in the RFFT process function to avoid overflows.    
+ * Number of bits downscaled, depends on the size of the transform.    
+ * The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:     
+ *    
+ * \image html dct4FormatsQ15Table.gif    
+ */
+
+void arm_dct4_q15(
+  const arm_dct4_instance_q15 * S,
+  q15_t * pState,
+  q15_t * pInlineBuffer)
+{
+  uint32_t i;                                    /* Loop counter */
+  q15_t *weights = S->pTwiddle;                  /* Pointer to the Weights table */
+  q15_t *cosFact = S->pCosFactor;                /* Pointer to the cos factors table */
+  q15_t *pS1, *pS2, *pbuff;                      /* Temporary pointers for input buffer and pState buffer */
+  q15_t in;                                      /* Temporary variable */
+
+
+  /* DCT4 computation involves DCT2 (which is calculated using RFFT)    
+   * along with some pre-processing and post-processing.    
+   * Computational procedure is explained as follows:    
+   * (a) Pre-processing involves multiplying input with cos factor,    
+   *     r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))    
+   *              where,    
+   *                 r(n) -- output of preprocessing    
+   *                 u(n) -- input to preprocessing(actual Source buffer)    
+   * (b) Calculation of DCT2 using FFT is divided into three steps:    
+   *                  Step1: Re-ordering of even and odd elements of input.    
+   *                  Step2: Calculating FFT of the re-ordered input.    
+   *                  Step3: Taking the real part of the product of FFT output and weights.    
+   * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:    
+   *                   Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)    
+   *                        where,    
+   *                           Y4 -- DCT4 output,   Y2 -- DCT2 output    
+   * (d) Multiplying the output with the normalizing factor sqrt(2/N).    
+   */
+
+        /*-------- Pre-processing ------------*/
+  /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
+  arm_mult_q15(pInlineBuffer, cosFact, pInlineBuffer, S->N);
+  arm_shift_q15(pInlineBuffer, 1, pInlineBuffer, S->N);
+
+  /* ----------------------------------------------------------------    
+   * Step1: Re-ordering of even and odd elements as    
+   *             pState[i] =  pInlineBuffer[2*i] and    
+   *             pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2    
+   ---------------------------------------------------------------------*/
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
+  pS2 = pState + (S->N - 1u);
+
+  /* pbuff initialized to input buffer */
+  pbuff = pInlineBuffer;
+
+
+#ifndef ARM_MATH_CM0_FAMILY
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
+  i = (uint32_t) S->Nby2 >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  do
+  {
+    /* Re-ordering of even and odd elements */
+    /* pState[i] =  pInlineBuffer[2*i] */
+    *pS1++ = *pbuff++;
+    /* pState[N-i-1] = pInlineBuffer[2*i+1] */
+    *pS2-- = *pbuff++;
+
+    *pS1++ = *pbuff++;
+    *pS2-- = *pbuff++;
+
+    *pS1++ = *pbuff++;
+    *pS2-- = *pbuff++;
+
+    *pS1++ = *pbuff++;
+    *pS2-- = *pbuff++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+  /* pbuff initialized to input buffer */
+  pbuff = pInlineBuffer;
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* Initializing the loop counter to N/4 instead of N for loop unrolling */
+  i = (uint32_t) S->N >> 2u;
+
+  /* Processing with loop unrolling 4 times as N is always multiple of 4.    
+   * Compute 4 outputs at a time */
+  do
+  {
+    /* Writing the re-ordered output back to inplace input buffer */
+    *pbuff++ = *pS1++;
+    *pbuff++ = *pS1++;
+    *pbuff++ = *pS1++;
+    *pbuff++ = *pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+
+  /* ---------------------------------------------------------    
+   *     Step2: Calculate RFFT for N-point input    
+   * ---------------------------------------------------------- */
+  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
+  arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
+
+ /*----------------------------------------------------------------------    
+  *  Step3: Multiply the FFT output with the weights.    
+  *----------------------------------------------------------------------*/
+  arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
+
+  /* The output of complex multiplication is in 3.13 format.    
+   * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
+  arm_shift_q15(pState, 2, pState, S->N * 2);
+
+  /* ----------- Post-processing ---------- */
+  /* DCT-IV can be obtained from DCT-II by the equation,    
+   *       Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)    
+   *       Hence, Y4(0) = Y2(0)/2  */
+  /* Getting only real part from the output and Converting to DCT-IV */
+
+  /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
+  i = ((uint32_t) S->N - 1u) >> 2u;
+
+  /* pbuff initialized to input buffer. */
+  pbuff = pInlineBuffer;
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
+  in = *pS1++ >> 1u;
+  /* input buffer acts as inplace, so output values are stored in the input itself. */
+  *pbuff++ = in;
+
+  /* pState pointer is incremented twice as the real values are located alternatively in the array */
+  pS1++;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  do
+  {
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    /* points to the next real value */
+    pS1++;
+
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    pS1++;
+
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    pS1++;
+
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  i = ((uint32_t) S->N - 1u) % 0x4u;
+
+  while(i > 0u)
+  {
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    /* points to the next real value */
+    pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  }
+
+
+   /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+
+  /* Initializing the loop counter to N/4 instead of N for loop unrolling */
+  i = (uint32_t) S->N >> 2u;
+
+  /* pbuff initialized to the pInlineBuffer(now contains the output values) */
+  pbuff = pInlineBuffer;
+
+  /* Processing with loop unrolling 4 times as N is always multiple of 4.  Compute 4 outputs at a time */
+  do
+  {
+    /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
+    in = *pbuff;
+    *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
+
+    in = *pbuff;
+    *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
+
+    in = *pbuff;
+    *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
+
+    in = *pbuff;
+    *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Initializing the loop counter to N/2 */
+  i = (uint32_t) S->Nby2;
+
+  do
+  {
+    /* Re-ordering of even and odd elements */
+    /* pState[i] =  pInlineBuffer[2*i] */
+    *pS1++ = *pbuff++;
+    /* pState[N-i-1] = pInlineBuffer[2*i+1] */
+    *pS2-- = *pbuff++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+  /* pbuff initialized to input buffer */
+  pbuff = pInlineBuffer;
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* Initializing the loop counter */
+  i = (uint32_t) S->N;
+
+  do
+  {
+    /* Writing the re-ordered output back to inplace input buffer */
+    *pbuff++ = *pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+
+  /* ---------------------------------------------------------    
+   *     Step2: Calculate RFFT for N-point input    
+   * ---------------------------------------------------------- */
+  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
+  arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
+
+ /*----------------------------------------------------------------------    
+  *  Step3: Multiply the FFT output with the weights.    
+  *----------------------------------------------------------------------*/
+  arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
+
+  /* The output of complex multiplication is in 3.13 format.    
+   * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
+  arm_shift_q15(pState, 2, pState, S->N * 2);
+
+  /* ----------- Post-processing ---------- */
+  /* DCT-IV can be obtained from DCT-II by the equation,    
+   *       Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)    
+   *       Hence, Y4(0) = Y2(0)/2  */
+  /* Getting only real part from the output and Converting to DCT-IV */
+
+  /* Initializing the loop counter */
+  i = ((uint32_t) S->N - 1u);
+
+  /* pbuff initialized to input buffer. */
+  pbuff = pInlineBuffer;
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
+  in = *pS1++ >> 1u;
+  /* input buffer acts as inplace, so output values are stored in the input itself. */
+  *pbuff++ = in;
+
+  /* pState pointer is incremented twice as the real values are located alternatively in the array */
+  pS1++;
+
+  do
+  {
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    /* points to the next real value */
+    pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+   /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+
+  /* Initializing the loop counter */
+  i = (uint32_t) S->N;
+
+  /* pbuff initialized to the pInlineBuffer(now contains the output values) */
+  pbuff = pInlineBuffer;
+
+  do
+  {
+    /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
+    in = *pbuff;
+    *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+#endif /* #ifndef ARM_MATH_CM0_FAMILY */
+
+}
+
+/**    
+   * @} end of DCT4_IDCT4 group    
+   */
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_dct4_q31.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_dct4_q31.c
@ -0,0 +1,395 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_dct4_q31.c    
+*    
+* Description:	Processing function of DCT4 & IDCT4 Q31.    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+/**    
+ * @addtogroup DCT4_IDCT4    
+ * @{    
+ */
+
+/**    
+ * @brief Processing function for the Q31 DCT4/IDCT4.   
+ * @param[in]       *S             points to an instance of the Q31 DCT4 structure.   
+ * @param[in]       *pState        points to state buffer.   
+ * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.   
+ * @return none.   
+ * \par Input an output formats:    
+ * Input samples need to be downscaled by 1 bit to avoid saturations in the Q31 DCT process,    
+ * as the conversion from DCT2 to DCT4 involves one subtraction.    
+ * Internally inputs are downscaled in the RFFT process function to avoid overflows.    
+ * Number of bits downscaled, depends on the size of the transform.    
+ * The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:     
+ *    
+ * \image html dct4FormatsQ31Table.gif    
+ */
+
+void arm_dct4_q31(
+  const arm_dct4_instance_q31 * S,
+  q31_t * pState,
+  q31_t * pInlineBuffer)
+{
+  uint16_t i;                                    /* Loop counter */
+  q31_t *weights = S->pTwiddle;                  /* Pointer to the Weights table */
+  q31_t *cosFact = S->pCosFactor;                /* Pointer to the cos factors table */
+  q31_t *pS1, *pS2, *pbuff;                      /* Temporary pointers for input buffer and pState buffer */
+  q31_t in;                                      /* Temporary variable */
+
+
+  /* DCT4 computation involves DCT2 (which is calculated using RFFT)    
+   * along with some pre-processing and post-processing.    
+   * Computational procedure is explained as follows:    
+   * (a) Pre-processing involves multiplying input with cos factor,    
+   *     r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))    
+   *              where,    
+   *                 r(n) -- output of preprocessing    
+   *                 u(n) -- input to preprocessing(actual Source buffer)    
+   * (b) Calculation of DCT2 using FFT is divided into three steps:    
+   *                  Step1: Re-ordering of even and odd elements of input.    
+   *                  Step2: Calculating FFT of the re-ordered input.    
+   *                  Step3: Taking the real part of the product of FFT output and weights.    
+   * (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:    
+   *                   Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)    
+   *                        where,    
+   *                           Y4 -- DCT4 output,   Y2 -- DCT2 output    
+   * (d) Multiplying the output with the normalizing factor sqrt(2/N).    
+   */
+
+        /*-------- Pre-processing ------------*/
+  /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
+  arm_mult_q31(pInlineBuffer, cosFact, pInlineBuffer, S->N);
+  arm_shift_q31(pInlineBuffer, 1, pInlineBuffer, S->N);
+
+  /* ----------------------------------------------------------------    
+   * Step1: Re-ordering of even and odd elements as    
+   *             pState[i] =  pInlineBuffer[2*i] and    
+   *             pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2    
+   ---------------------------------------------------------------------*/
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
+  pS2 = pState + (S->N - 1u);
+
+  /* pbuff initialized to input buffer */
+  pbuff = pInlineBuffer;
+
+#ifndef ARM_MATH_CM0_FAMILY
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
+  i = S->Nby2 >> 2u;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  do
+  {
+    /* Re-ordering of even and odd elements */
+    /* pState[i] =  pInlineBuffer[2*i] */
+    *pS1++ = *pbuff++;
+    /* pState[N-i-1] = pInlineBuffer[2*i+1] */
+    *pS2-- = *pbuff++;
+
+    *pS1++ = *pbuff++;
+    *pS2-- = *pbuff++;
+
+    *pS1++ = *pbuff++;
+    *pS2-- = *pbuff++;
+
+    *pS1++ = *pbuff++;
+    *pS2-- = *pbuff++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+  /* pbuff initialized to input buffer */
+  pbuff = pInlineBuffer;
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* Initializing the loop counter to N/4 instead of N for loop unrolling */
+  i = S->N >> 2u;
+
+  /* Processing with loop unrolling 4 times as N is always multiple of 4.    
+   * Compute 4 outputs at a time */
+  do
+  {
+    /* Writing the re-ordered output back to inplace input buffer */
+    *pbuff++ = *pS1++;
+    *pbuff++ = *pS1++;
+    *pbuff++ = *pS1++;
+    *pbuff++ = *pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+
+  /* ---------------------------------------------------------    
+   *     Step2: Calculate RFFT for N-point input    
+   * ---------------------------------------------------------- */
+  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
+  arm_rfft_q31(S->pRfft, pInlineBuffer, pState);
+
+  /*----------------------------------------------------------------------    
+   *  Step3: Multiply the FFT output with the weights.    
+   *----------------------------------------------------------------------*/
+  arm_cmplx_mult_cmplx_q31(pState, weights, pState, S->N);
+
+  /* The output of complex multiplication is in 3.29 format.    
+   * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
+  arm_shift_q31(pState, 2, pState, S->N * 2);
+
+  /* ----------- Post-processing ---------- */
+  /* DCT-IV can be obtained from DCT-II by the equation,    
+   *       Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)    
+   *       Hence, Y4(0) = Y2(0)/2  */
+  /* Getting only real part from the output and Converting to DCT-IV */
+
+  /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
+  i = (S->N - 1u) >> 2u;
+
+  /* pbuff initialized to input buffer. */
+  pbuff = pInlineBuffer;
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
+  in = *pS1++ >> 1u;
+  /* input buffer acts as inplace, so output values are stored in the input itself. */
+  *pbuff++ = in;
+
+  /* pState pointer is incremented twice as the real values are located alternatively in the array */
+  pS1++;
+
+  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.    
+   ** a second loop below computes the remaining 1 to 3 samples. */
+  do
+  {
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    /* points to the next real value */
+    pS1++;
+
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    pS1++;
+
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    pS1++;
+
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.    
+   ** No loop unrolling is used. */
+  i = (S->N - 1u) % 0x4u;
+
+  while(i > 0u)
+  {
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    /* points to the next real value */
+    pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  }
+
+
+        /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+
+  /* Initializing the loop counter to N/4 instead of N for loop unrolling */
+  i = S->N >> 2u;
+
+  /* pbuff initialized to the pInlineBuffer(now contains the output values) */
+  pbuff = pInlineBuffer;
+
+  /* Processing with loop unrolling 4 times as N is always multiple of 4.  Compute 4 outputs at a time */
+  do
+  {
+    /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
+    in = *pbuff;
+    *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
+
+    in = *pbuff;
+    *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
+
+    in = *pbuff;
+    *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
+
+    in = *pbuff;
+    *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  /* Initializing the loop counter to N/2 */
+  i = S->Nby2;
+
+  do
+  {
+    /* Re-ordering of even and odd elements */
+    /* pState[i] =  pInlineBuffer[2*i] */
+    *pS1++ = *pbuff++;
+    /* pState[N-i-1] = pInlineBuffer[2*i+1] */
+    *pS2-- = *pbuff++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+  /* pbuff initialized to input buffer */
+  pbuff = pInlineBuffer;
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* Initializing the loop counter */
+  i = S->N;
+
+  do
+  {
+    /* Writing the re-ordered output back to inplace input buffer */
+    *pbuff++ = *pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+
+  /* ---------------------------------------------------------    
+   *     Step2: Calculate RFFT for N-point input    
+   * ---------------------------------------------------------- */
+  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
+  arm_rfft_q31(S->pRfft, pInlineBuffer, pState);
+
+  /*----------------------------------------------------------------------    
+   *  Step3: Multiply the FFT output with the weights.    
+   *----------------------------------------------------------------------*/
+  arm_cmplx_mult_cmplx_q31(pState, weights, pState, S->N);
+
+  /* The output of complex multiplication is in 3.29 format.    
+   * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
+  arm_shift_q31(pState, 2, pState, S->N * 2);
+
+  /* ----------- Post-processing ---------- */
+  /* DCT-IV can be obtained from DCT-II by the equation,    
+   *       Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)    
+   *       Hence, Y4(0) = Y2(0)/2  */
+  /* Getting only real part from the output and Converting to DCT-IV */
+
+  /* pbuff initialized to input buffer. */
+  pbuff = pInlineBuffer;
+
+  /* pS1 initialized to pState */
+  pS1 = pState;
+
+  /* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
+  in = *pS1++ >> 1u;
+  /* input buffer acts as inplace, so output values are stored in the input itself. */
+  *pbuff++ = in;
+
+  /* pState pointer is incremented twice as the real values are located alternatively in the array */
+  pS1++;
+
+  /* Initializing the loop counter */
+  i = (S->N - 1u);
+
+  while(i > 0u)
+  {
+    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
+    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
+    in = *pS1++ - in;
+    *pbuff++ = in;
+    /* points to the next real value */
+    pS1++;
+
+    /* Decrement the loop counter */
+    i--;
+  }
+
+
+        /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+
+  /* Initializing the loop counter */
+  i = S->N;
+
+  /* pbuff initialized to the pInlineBuffer(now contains the output values) */
+  pbuff = pInlineBuffer;
+
+  do
+  {
+    /* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
+    in = *pbuff;
+    *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
+
+    /* Decrement the loop counter */
+    i--;
+  } while(i > 0u);
+
+#endif /* #ifndef ARM_MATH_CM0_FAMILY */
+
+}
+
+/**    
+   * @} end of DCT4_IDCT4 group    
+   */
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_f32.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_f32.c
@ -0,0 +1,329 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_rfft_f32.c    
+*    
+* Description:	RFFT & RIFFT Floating point process function    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.  
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+extern void arm_radix4_butterfly_f32(
+    float32_t * pSrc,
+    uint16_t fftLen,
+    float32_t * pCoef,
+    uint16_t twidCoefModifier);
+
+extern void arm_radix4_butterfly_inverse_f32(
+    float32_t * pSrc,
+    uint16_t fftLen,
+    float32_t * pCoef,
+    uint16_t twidCoefModifier,
+    float32_t onebyfftLen);
+
+extern void arm_bitreversal_f32(
+    float32_t * pSrc,
+    uint16_t fftSize,
+    uint16_t bitRevFactor,
+    uint16_t * pBitRevTab);
+
+/**    
+ * @ingroup groupTransforms    
+ */
+
+/*--------------------------------------------------------------------    
+ *		Internal functions prototypes    
+ *--------------------------------------------------------------------*/
+
+void arm_split_rfft_f32(
+  float32_t * pSrc,
+  uint32_t fftLen,
+  float32_t * pATable,
+  float32_t * pBTable,
+  float32_t * pDst,
+  uint32_t modifier);
+void arm_split_rifft_f32(
+  float32_t * pSrc,
+  uint32_t fftLen,
+  float32_t * pATable,
+  float32_t * pBTable,
+  float32_t * pDst,
+  uint32_t modifier);
+
+/**    
+ * @addtogroup RealFFT    
+ * @{    
+ */
+
+/**    
+ * @brief Processing function for the floating-point RFFT/RIFFT.   
+ * @deprecated Do not use this function.  It has been superceded by \ref arm_rfft_fast_f32 and will be removed
+ * in the future.
+ * @param[in]  *S    points to an instance of the floating-point RFFT/RIFFT structure.   
+ * @param[in]  *pSrc points to the input buffer.   
+ * @param[out] *pDst points to the output buffer.   
+ * @return none.   
+ */
+
+void arm_rfft_f32(
+  const arm_rfft_instance_f32 * S,
+  float32_t * pSrc,
+  float32_t * pDst)
+{
+  const arm_cfft_radix4_instance_f32 *S_CFFT = S->pCfft;
+
+
+  /* Calculation of Real IFFT of input */
+  if(S->ifftFlagR == 1u)
+  {
+    /*  Real IFFT core process */
+    arm_split_rifft_f32(pSrc, S->fftLenBy2, S->pTwiddleAReal,
+                        S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+
+
+    /* Complex radix-4 IFFT process */
+    arm_radix4_butterfly_inverse_f32(pDst, S_CFFT->fftLen,
+                                     S_CFFT->pTwiddle,
+                                     S_CFFT->twidCoefModifier,
+                                     S_CFFT->onebyfftLen);
+
+    /* Bit reversal process */
+    if(S->bitReverseFlagR == 1u)
+    {
+      arm_bitreversal_f32(pDst, S_CFFT->fftLen,
+                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
+    }
+  }
+  else
+  {
+
+    /* Calculation of RFFT of input */
+
+    /* Complex radix-4 FFT process */
+    arm_radix4_butterfly_f32(pSrc, S_CFFT->fftLen,
+                             S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);
+
+    /* Bit reversal process */
+    if(S->bitReverseFlagR == 1u)
+    {
+      arm_bitreversal_f32(pSrc, S_CFFT->fftLen,
+                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
+    }
+
+
+    /*  Real FFT core process */
+    arm_split_rfft_f32(pSrc, S->fftLenBy2, S->pTwiddleAReal,
+                       S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+  }
+
+}
+
+/**    
+   * @} end of RealFFT group    
+   */
+
+/**    
+ * @brief  Core Real FFT process    
+ * @param[in]   *pSrc 				points to the input buffer.    
+ * @param[in]   fftLen  			length of FFT.    
+ * @param[in]   *pATable 			points to the twiddle Coef A buffer.    
+ * @param[in]   *pBTable 			points to the twiddle Coef B buffer.    
+ * @param[out]  *pDst 				points to the output buffer.    
+ * @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
+ * @return none.    
+ */
+
+void arm_split_rfft_f32(
+  float32_t * pSrc,
+  uint32_t fftLen,
+  float32_t * pATable,
+  float32_t * pBTable,
+  float32_t * pDst,
+  uint32_t modifier)
+{
+  uint32_t i;                                    /* Loop Counter */
+  float32_t outR, outI;                          /* Temporary variables for output */
+  float32_t *pCoefA, *pCoefB;                    /* Temporary pointers for twiddle factors */
+  float32_t CoefA1, CoefA2, CoefB1;              /* Temporary variables for twiddle coefficients */
+  float32_t *pDst1 = &pDst[2], *pDst2 = &pDst[(4u * fftLen) - 1u];      /* temp pointers for output buffer */
+  float32_t *pSrc1 = &pSrc[2], *pSrc2 = &pSrc[(2u * fftLen) - 1u];      /* temp pointers for input buffer */
+
+  /* Init coefficient pointers */
+  pCoefA = &pATable[modifier * 2u];
+  pCoefB = &pBTable[modifier * 2u];
+
+  i = fftLen - 1u;
+
+  while(i > 0u)
+  {
+    /*    
+       outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]    
+       + pSrc[2 * n - 2 * i] * pBTable[2 * i] +    
+       pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);    
+     */
+
+    /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +    
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
+
+    /* read pATable[2 * i] */
+    CoefA1 = *pCoefA++;
+    /* pATable[2 * i + 1] */
+    CoefA2 = *pCoefA;
+
+    /* pSrc[2 * i] * pATable[2 * i] */
+    outR = *pSrc1 * CoefA1;
+    /* pSrc[2 * i] * CoefA2 */
+    outI = *pSrc1++ * CoefA2;
+
+    /* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
+    outR -= (*pSrc1 + *pSrc2) * CoefA2;
+    /* pSrc[2 * i + 1] * CoefA1 */
+    outI += *pSrc1++ * CoefA1;
+
+    CoefB1 = *pCoefB;
+
+    /* pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
+    outI -= *pSrc2-- * CoefB1;
+    /* pSrc[2 * fftLen - 2 * i] * CoefA2 */
+    outI -= *pSrc2 * CoefA2;
+
+    /* pSrc[2 * fftLen - 2 * i] * CoefB1 */
+    outR += *pSrc2-- * CoefB1;
+
+    /* write output */
+    *pDst1++ = outR;
+    *pDst1++ = outI;
+
+    /* write complex conjugate output */
+    *pDst2-- = -outI;
+    *pDst2-- = outR;
+
+    /* update coefficient pointer */
+    pCoefB = pCoefB + (modifier * 2u);
+    pCoefA = pCoefA + ((modifier * 2u) - 1u);
+
+    i--;
+
+  }
+
+  pDst[2u * fftLen] = pSrc[0] - pSrc[1];
+  pDst[(2u * fftLen) + 1u] = 0.0f;
+
+  pDst[0] = pSrc[0] + pSrc[1];
+  pDst[1] = 0.0f;
+
+}
+
+
+/**    
+ * @brief  Core Real IFFT process    
+ * @param[in]   *pSrc 				points to the input buffer.    
+ * @param[in]   fftLen  			length of FFT.   
+ * @param[in]   *pATable 			points to the twiddle Coef A buffer.   
+ * @param[in]   *pBTable 			points to the twiddle Coef B buffer.   
+ * @param[out]  *pDst 				points to the output buffer.   
+ * @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.    
+ * @return none.    
+ */
+
+void arm_split_rifft_f32(
+  float32_t * pSrc,
+  uint32_t fftLen,
+  float32_t * pATable,
+  float32_t * pBTable,
+  float32_t * pDst,
+  uint32_t modifier)
+{
+  float32_t outR, outI;                          /* Temporary variables for output */
+  float32_t *pCoefA, *pCoefB;                    /* Temporary pointers for twiddle factors */
+  float32_t CoefA1, CoefA2, CoefB1;              /* Temporary variables for twiddle coefficients */
+  float32_t *pSrc1 = &pSrc[0], *pSrc2 = &pSrc[(2u * fftLen) + 1u];
+
+  pCoefA = &pATable[0];
+  pCoefB = &pBTable[0];
+
+  while(fftLen > 0u)
+  {
+    /*    
+       outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +    
+       pIn[2 * n - 2 * i] * pBTable[2 * i] -    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);    
+
+       outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -    
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);    
+
+     */
+
+    CoefA1 = *pCoefA++;
+    CoefA2 = *pCoefA;
+
+    /* outR = (pSrc[2 * i] * CoefA1 */
+    outR = *pSrc1 * CoefA1;
+
+    /* - pSrc[2 * i] * CoefA2 */
+    outI = -(*pSrc1++) * CoefA2;
+
+    /* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
+    outR += (*pSrc1 + *pSrc2) * CoefA2;
+
+    /* pSrc[2 * i + 1] * CoefA1 */
+    outI += (*pSrc1++) * CoefA1;
+
+    CoefB1 = *pCoefB;
+
+    /* - pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
+    outI -= *pSrc2-- * CoefB1;
+
+    /* pSrc[2 * fftLen - 2 * i] * CoefB1 */
+    outR += *pSrc2 * CoefB1;
+
+    /* pSrc[2 * fftLen - 2 * i] * CoefA2 */
+    outI += *pSrc2-- * CoefA2;
+
+    /* write output */
+    *pDst++ = outR;
+    *pDst++ = outI;
+
+    /* update coefficient pointer */
+    pCoefB = pCoefB + (modifier * 2u);
+    pCoefA = pCoefA + ((modifier * 2u) - 1u);
+
+    /* Decrement loop count */
+    fftLen--;
+  }
+
+}
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_fast_f32.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_fast_f32.c
@ -0,0 +1,354 @@
+/* ----------------------------------------------------------------------
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.
+*
+* $Date:        17. January 2013
+* $Revision: 	V1.4.1
+*
+* Project: 	    CMSIS DSP Library
+* Title:	    arm_rfft_f32.c
+*
+* Description:	RFFT & RIFFT Floating point process function
+*
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+void stage_rfft_f32(
+  arm_rfft_fast_instance_f32 * S,
+  float32_t * p, float32_t * pOut)
+{
+   uint32_t  k;								   /* Loop Counter                     */
+   float32_t twR, twI;						   /* RFFT Twiddle coefficients        */
+   float32_t * pCoeff = S->pTwiddleRFFT;  /* Points to RFFT Twiddle factors   */
+   float32_t *pA = p;						   /* increasing pointer               */
+   float32_t *pB = p;						   /* decreasing pointer               */
+   float32_t xAR, xAI, xBR, xBI;				/* temporary variables              */
+   float32_t t1a, t1b;				         /* temporary variables              */
+   float32_t p0, p1, p2, p3;				   /* temporary variables              */
+
+
+   k = (S->Sint).fftLen - 1;					
+
+   /* Pack first and last sample of the frequency domain together */
+
+   xBR = pB[0];
+   xBI = pB[1];
+   xAR = pA[0];
+   xAI = pA[1];
+
+   twR = *pCoeff++ ;
+   twI = *pCoeff++ ;
+   
+   // U1 = XA(1) + XB(1); % It is real
+   t1a = xBR + xAR  ;
+   
+   // U2 = XB(1) - XA(1); % It is imaginary
+   t1b = xBI + xAI  ;
+
+   // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
+   // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
+   *pOut++ = 0.5f * ( t1a + t1b );
+   *pOut++ = 0.5f * ( t1a - t1b );
+
+   // XA(1) = 1/2*( U1 - imag(U2) +  i*( U1 +imag(U2) ));
+   pB  = p + 2*k;
+   pA += 2;
+
+   do
+   {
+      /*
+         function X = my_split_rfft(X, ifftFlag)
+         % X is a series of real numbers
+         L  = length(X);
+         XC = X(1:2:end) +i*X(2:2:end);
+         XA = fft(XC);
+         XB = conj(XA([1 end:-1:2]));
+         TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
+         for l = 2:L/2
+            XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
+         end
+         XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
+         X = XA;
+      */
+
+      xBI = pB[1];
+      xBR = pB[0];
+      xAR = pA[0];
+      xAI = pA[1];
+
+      twR = *pCoeff++;
+      twI = *pCoeff++;
+
+      t1a = xBR - xAR ;
+      t1b = xBI + xAI ;
+
+      // real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
+      // imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
+      p0 = twR * t1a;
+      p1 = twI * t1a;
+      p2 = twR * t1b;
+      p3 = twI * t1b;
+
+      *pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
+      *pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
+
+      pA += 2;
+      pB -= 2;
+      k--;
+   } while(k > 0u);
+}
+
+/* Prepares data for inverse cfft */
+void merge_rfft_f32(
+arm_rfft_fast_instance_f32 * S,
+float32_t * p, float32_t * pOut)
+{
+   uint32_t  k;								/* Loop Counter                     */
+   float32_t twR, twI;						/* RFFT Twiddle coefficients        */
+   float32_t *pCoeff = S->pTwiddleRFFT;		/* Points to RFFT Twiddle factors   */
+   float32_t *pA = p;						/* increasing pointer               */
+   float32_t *pB = p;						/* decreasing pointer               */
+   float32_t xAR, xAI, xBR, xBI;			/* temporary variables              */
+   float32_t t1a, t1b, r, s, t, u;			/* temporary variables              */
+
+   k = (S->Sint).fftLen - 1;					
+
+   xAR = pA[0];
+   xAI = pA[1];
+
+   pCoeff += 2 ;
+
+   *pOut++ = 0.5f * ( xAR + xAI );
+   *pOut++ = 0.5f * ( xAR - xAI );
+
+   pB  =  p + 2*k ;
+   pA +=  2	   ;
+
+   while(k > 0u)
+   {
+      /* G is half of the frequency complex spectrum */
+      //for k = 2:N
+      //    Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
+      xBI =   pB[1]    ;
+      xBR =   pB[0]    ;
+      xAR =  pA[0];
+      xAI =  pA[1];
+
+      twR = *pCoeff++;
+      twI = *pCoeff++;
+
+      t1a = xAR - xBR ;
+      t1b = xAI + xBI ;
+
+      r = twR * t1a;
+      s = twI * t1b;
+      t = twI * t1a;
+      u = twR * t1b;
+
+      // real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
+      // imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
+      *pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
+      *pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
+
+      pA += 2;
+      pB -= 2;
+      k--;
+   }
+
+}
+
+/**
+* @ingroup groupTransforms
+*/
+
+/**
+ * @defgroup Fast Real FFT Functions
+ *
+ * \par
+ * The CMSIS DSP library includes specialized algorithms for computing the
+ * FFT of real data sequences.  The FFT is defined over complex data but
+ * in many applications the input is real.  Real FFT algorithms take advantage
+ * of the symmetry properties of the FFT and have a speed advantage over complex
+ * algorithms of the same length.
+ * \par
+ * The Fast RFFT algorith relays on the mixed radix CFFT that save processor usage.
+ * \par
+ * The real length N forward FFT of a sequence is computed using the steps shown below.
+ * \par
+ * \image html RFFT.gif "Real Fast Fourier Transform"
+ * \par
+ * The real sequence is initially treated as if it were complex to perform a CFFT.
+ * Later, a processing stage reshapes the data to obtain half of the frequency spectrum
+ * in complex format. Except the first complex number that contains the two real numbers
+ * X[0] and X[N/2] all the data is complex. In other words, the first complex sample
+ * contains two real values packed.
+ * \par
+ * The input for the inverse RFFT should keep the same format as the output of the 
+ * forward RFFT. A first processing stage pre-process the data to later perform an
+ * inverse CFFT.
+ * \par    
+ * \image html RIFFT.gif "Real Inverse Fast Fourier Transform"    
+ * \par    
+ * The algorithms for floating-point, Q15, and Q31 data are slightly different
+ * and we describe each algorithm in turn.
+ * \par Floating-point
+ * The main functions are <code>arm_rfft_fast_f32()</code>
+ * and <code>arm_rfft_fast_init_f32()</code>.  The older functions
+ * <code>arm_rfft_f32()</code> and <code>arm_rfft_init_f32()</code> have been
+ * deprecated but are still documented.
+ * \par
+ * The FFT of a real N-point sequence has even symmetry in the frequency
+ * domain.  The second half of the data equals the conjugate of the first half
+ * flipped in frequency:
+ * <pre>
+ *X[0] - real data
+ *X[1] - complex data
+ *X[2] - complex data
+ *... 
+ *X[fftLen/2-1] - complex data
+ *X[fftLen/2] - real data
+ *X[fftLen/2+1] - conjugate of X[fftLen/2-1]
+ *X[fftLen/2+2] - conjugate of X[fftLen/2-2]
+ *... 
+ *X[fftLen-1] - conjugate of X[1]
+ * </pre>
+ * Looking at the data, we see that we can uniquely represent the FFT using only
+ * <pre>
+ *N/2+1 samples:
+ *X[0] - real data
+ *X[1] - complex data
+ *X[2] - complex data
+ *... 
+ *X[fftLen/2-1] - complex data
+ *X[fftLen/2] - real data
+ * </pre>
+ * Looking more closely we see that the first and last samples are real valued.
+ * They can be packed together and we can thus represent the FFT of an N-point
+ * real sequence by N/2 complex values:
+ * <pre>
+ *X[0],X[N/2] - packed real data: X[0] + jX[N/2]
+ *X[1] - complex data
+ *X[2] - complex data
+ *... 
+ *X[fftLen/2-1] - complex data
+ * </pre>
+ * The real FFT functions pack the frequency domain data in this fashion.  The
+ * forward transform outputs the data in this form and the inverse transform
+ * expects input data in this form.  The function always performs the needed
+ * bitreversal so that the input and output data is always in normal order.  The 
+ * functions support lengths of [32, 64, 128, ..., 4096] samples.
+ * \par
+ * The forward and inverse real FFT functions apply the standard FFT scaling; no
+ * scaling on the forward transform and 1/fftLen scaling on the inverse
+ * transform.
+ * \par Q15 and Q31
+ * The real algorithms are defined in a similar manner and utilize N/2 complex
+ * transforms behind the scenes.  In the case of fixed-point data, a radix-4
+ * complex transform is performed and this limits the allows sequence lengths to
+ * 128, 512, and 2048 samples.
+ * \par
+ * TBD.  We need to document input and output order of data.
+ * \par
+ * The complex transforms used internally include scaling to prevent fixed-point
+ * overflows.  The overall scaling equals 1/(fftLen/2).
+ * \par
+ * A separate instance structure must be defined for each transform used but 
+ * twiddle factor and bit reversal tables can be reused.
+ * \par
+ * There is also an associated initialization function for each data type. 
+ * The initialization function performs the following operations:
+ * - Sets the values of the internal structure fields.   
+ * - Initializes twiddle factor table and bit reversal table pointers.
+ * - Initializes the internal complex FFT data structure.
+ * \par   
+ * Use of the initialization function is optional.   
+ * However, if the initialization function is used, then the instance structure 
+ * cannot be placed into a const data section. To place an instance structure 
+ * into a const data section, the instance structure should be manually 
+ * initialized as follows:
+ * <pre>
+ *arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};    
+ *arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};    
+ * </pre>
+ * where <code>fftLenReal</code> is the length of the real transform;
+ * <code>fftLenBy2</code> length of  the internal complex transform.
+ * <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
+ * <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
+ * output (=1).
+ * <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
+ * The value is based on the FFT length;
+ * <code>pTwiddleAReal</code>points to the A array of twiddle coefficients; 
+ * <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;    
+ * <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
+ * must also be initialized.  Refer to arm_cfft_radix4_f32() for details regarding    
+ * static initialization of the complex FFT instance structure.    
+ */
+
+/**
+* @addtogroup RealFFT
+* @{
+*/
+
+/**
+* @brief Processing function for the floating-point real FFT.
+* @param[in]  *S              points to an arm_rfft_fast_instance_f32 structure.
+* @param[in]  *p              points to the input buffer.
+* @param[in]  *pOut           points to an arm_rfft_fast_instance_f32 structure.
+* @param[in]  ifftFlag        RFFT if flag is 0, RIFFT if flag is 1
+* @return none.
+*/
+
+void arm_rfft_fast_f32(
+arm_rfft_fast_instance_f32 * S,
+float32_t * p, float32_t * pOut,
+uint8_t ifftFlag)
+{
+   arm_cfft_instance_f32 * Sint = &(S->Sint);
+   Sint->fftLen = S->fftLenRFFT / 2;
+
+   /* Calculation of Real FFT */
+   if(ifftFlag)
+   {
+      /*  Real FFT comression */
+      merge_rfft_f32(S, p, pOut);
+
+      /* Complex radix-4 IFFT process */
+      arm_cfft_f32( Sint, pOut, ifftFlag, 1);
+   }
+   else
+   {
+      /* Calculation of RFFT of input */
+      arm_cfft_f32( Sint, p, ifftFlag, 1);
+   
+      /*  Real FFT extraction */
+      stage_rfft_f32(S, p, pOut);
+   }
+}
+
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_fast_init_f32.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_fast_init_f32.c
@ -0,0 +1,139 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_cfft_init_f32.c   
+*    
+* Description:	Split Radix Decimation in Frequency CFFT Floating point processing function   
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.   
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**   
+ * @ingroup groupTransforms   
+ */
+
+/**   
+ * @addtogroup RealFFT   
+ * @{   
+ */
+
+/**   
+* @brief  Initialization function for the floating-point real FFT.  
+* @param[in,out] *S             points to an arm_rfft_fast_instance_f32 structure.
+* @param[in]     fftLen         length of the Real Sequence.  
+* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.  
+*   
+* \par Description:  
+* \par   
+* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.   
+* Set(=1) ifftFlag for calculation of CIFFT otherwise  RFFT is calculated  
+* \par   
+* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.   
+* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.   
+* \par   
+* The parameter <code>fftLen</code>	Specifies length of RFFT/CIFFT process. Supported FFT Lengths are 16, 32, 64, 128, 256, 512, 1024, 2048, 4096.   
+* \par   
+* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.   
+*/
+arm_status arm_rfft_fast_init_f32(
+  arm_rfft_fast_instance_f32 * S,
+  uint16_t fftLen)
+{
+  arm_cfft_instance_f32 * Sint;
+  /*  Initialise the default arm status */
+  arm_status status = ARM_MATH_SUCCESS;
+  /*  Initialise the FFT length */
+  Sint = &(S->Sint);
+  Sint->fftLen = fftLen/2;
+  S->fftLenRFFT = fftLen;
+  /*  Initialise the Twiddle coefficient pointer */
+  //  S->pTwiddle = (float32_t *) twiddleCoef;
+
+  /*  Initializations of structure parameters depending on the FFT length */
+  switch (Sint->fftLen)
+  {
+  case 4096u:
+    /*  Initializations of structure parameters for 4096 point FFT */
+    /*  Initialise the bit reversal table length */
+    Sint->bitRevLength = ARMBITREVINDEXTABLE4096_TABLE_LENGTH;
+    /*  Initialise the bit reversal table pointer */
+    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable4096;
+    /*  Initialise the 1/fftLen Value */
+    break;
+  case 2048u:
+    Sint->bitRevLength = ARMBITREVINDEXTABLE2048_TABLE_LENGTH;
+    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable2048;
+    break;
+  case 1024u:
+    Sint->bitRevLength = ARMBITREVINDEXTABLE1024_TABLE_LENGTH;
+    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable1024;
+    break;
+  case 512u:
+    Sint->bitRevLength = ARMBITREVINDEXTABLE_512_TABLE_LENGTH;
+    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable512;
+    break;
+  case 256u:
+    Sint->bitRevLength = ARMBITREVINDEXTABLE_256_TABLE_LENGTH;
+    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable256;
+    break;
+  case 128u:
+    Sint->bitRevLength = ARMBITREVINDEXTABLE_128_TABLE_LENGTH;
+    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable128;
+    break;
+  case 64u:
+    Sint->bitRevLength = ARMBITREVINDEXTABLE__64_TABLE_LENGTH;
+    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable64;
+    break;
+  case 32u:
+    Sint->bitRevLength = ARMBITREVINDEXTABLE__32_TABLE_LENGTH;
+    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable32;
+    break;
+  case 16u:
+    Sint->bitRevLength = ARMBITREVINDEXTABLE__16_TABLE_LENGTH;
+    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable16;
+    break;
+  default:
+    /*  Reporting argument error if fftSize is not valid value */
+    status = ARM_MATH_ARGUMENT_ERROR;
+    break;
+  }
+
+  return (status);
+}
+
+/**   
+ * @} end of RealFFT group   
+ */
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_init_f32.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_init_f32.c
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_init_q15.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_init_q15.c
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_init_q31.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_init_q31.c
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_q15.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_q15.c
@ -0,0 +1,482 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_rfft_q15.c    
+*    
+* Description:	RFFT & RIFFT Q15 process function    
+*    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.     
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+void arm_radix4_butterfly_q15(
+  q15_t * pSrc16,
+  uint32_t fftLen,
+  q15_t * pCoef16,
+  uint32_t twidCoefModifier);
+
+void arm_radix4_butterfly_inverse_q15(
+  q15_t * pSrc16,
+  uint32_t fftLen,
+  q15_t * pCoef16,
+  uint32_t twidCoefModifier);
+
+void arm_bitreversal_q15(
+  q15_t * pSrc,
+  uint32_t fftLen,
+  uint16_t bitRevFactor,
+  uint16_t * pBitRevTab);
+	
+	/*--------------------------------------------------------------------    
+*		Internal functions prototypes    
+--------------------------------------------------------------------*/
+
+void arm_split_rfft_q15(
+  q15_t * pSrc,
+  uint32_t fftLen,
+  q15_t * pATable,
+  q15_t * pBTable,
+  q15_t * pDst,
+  uint32_t modifier);
+
+void arm_split_rifft_q15(
+  q15_t * pSrc,
+  uint32_t fftLen,
+  q15_t * pATable,
+  q15_t * pBTable,
+  q15_t * pDst,
+  uint32_t modifier);
+
+/**    
+ * @addtogroup RealFFT    
+ * @{    
+ */
+
+/**    
+ * @brief Processing function for the Q15 RFFT/RIFFT.   
+ * @param[in]  *S    points to an instance of the Q15 RFFT/RIFFT structure.   
+ * @param[in]  *pSrc points to the input buffer.   
+ * @param[out] *pDst points to the output buffer.   
+ * @return none.   
+ *    
+ * \par Input an output formats:   
+ * \par    
+ * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.    
+ * Hence the output format is different for different RFFT sizes.    
+ * The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:   
+ * \par    
+ * \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"    
+ * \par    
+ * \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"    
+ */
+
+void arm_rfft_q15(
+  const arm_rfft_instance_q15 * S,
+  q15_t * pSrc,
+  q15_t * pDst)
+{
+  const arm_cfft_radix4_instance_q15 *S_CFFT = S->pCfft;
+
+  /* Calculation of RIFFT of input */
+  if(S->ifftFlagR == 1u)
+  {
+    /*  Real IFFT core process */
+    arm_split_rifft_q15(pSrc, S->fftLenBy2, S->pTwiddleAReal,
+                        S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+
+    /* Complex readix-4 IFFT process */
+    arm_radix4_butterfly_inverse_q15(pDst, S_CFFT->fftLen,
+                                     S_CFFT->pTwiddle,
+                                     S_CFFT->twidCoefModifier);
+
+    /* Bit reversal process */
+    if(S->bitReverseFlagR == 1u)
+    {
+      arm_bitreversal_q15(pDst, S_CFFT->fftLen,
+                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
+    }
+  }
+  else
+  {
+    /* Calculation of RFFT of input */
+
+    /* Complex readix-4 FFT process */
+    arm_radix4_butterfly_q15(pSrc, S_CFFT->fftLen,
+                             S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);
+
+    /* Bit reversal process */
+    if(S->bitReverseFlagR == 1u)
+    {
+      arm_bitreversal_q15(pSrc, S_CFFT->fftLen,
+                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
+    }
+
+    arm_split_rfft_q15(pSrc, S->fftLenBy2, S->pTwiddleAReal,
+                       S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+  }
+
+}
+
+  /**    
+   * @} end of RealFFT group    
+   */
+
+/**    
+ * @brief  Core Real FFT process    
+ * @param  *pSrc 				points to the input buffer.   
+ * @param  fftLen  				length of FFT.   
+ * @param  *pATable 			points to the A twiddle Coef buffer.    
+ * @param  *pBTable 			points to the B twiddle Coef buffer.   
+ * @param  *pDst 				points to the output buffer.   
+ * @param  modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
+ * @return none.    
+ * The function implements a Real FFT    
+ */
+
+void arm_split_rfft_q15(
+  q15_t * pSrc,
+  uint32_t fftLen,
+  q15_t * pATable,
+  q15_t * pBTable,
+  q15_t * pDst,
+  uint32_t modifier)
+{
+  uint32_t i;                                    /* Loop Counter */
+  q31_t outR, outI;                              /* Temporary variables for output */
+  q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
+  q15_t *pSrc1, *pSrc2;
+
+
+//  pSrc[2u * fftLen] = pSrc[0]; 
+//  pSrc[(2u * fftLen) + 1u] = pSrc[1]; 
+
+  pCoefA = &pATable[modifier * 2u];
+  pCoefB = &pBTable[modifier * 2u];
+
+  pSrc1 = &pSrc[2];
+  pSrc2 = &pSrc[(2u * fftLen) - 2u];
+
+#ifndef ARM_MATH_CM0_FAMILY
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  i = 1u;
+
+  while(i < fftLen)
+  {
+    /*    
+       outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]    
+       + pSrc[2 * n - 2 * i] * pBTable[2 * i] +    
+       pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);    
+     */
+
+    /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +    
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
+
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    /* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */
+    outR = __SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA));
+
+#else
+
+    /* -(pSrc[2 * i + 1] * pATable[2 * i + 1] - pSrc[2 * i] * pATable[2 * i]) */
+    outR = -(__SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA)));
+
+#endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
+
+    /* pSrc[2 * n - 2 * i] * pBTable[2 * i] +    
+       pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
+    outR = __SMLAD(*__SIMD32(pSrc2), *__SIMD32(pCoefB), outR) >> 15u;
+
+    /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    outI = __SMUSDX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
+
+#else
+
+    outI = __SMUSDX(*__SIMD32(pCoefB), *__SIMD32(pSrc2)--);
+
+#endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
+
+    /* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */
+    outI = __SMLADX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), outI);
+
+    /* write output */
+    pDst[2u * i] = (q15_t) outR;
+    pDst[(2u * i) + 1u] = outI >> 15u;
+
+    /* write complex conjugate output */
+    pDst[(4u * fftLen) - (2u * i)] = (q15_t) outR;
+    pDst[((4u * fftLen) - (2u * i)) + 1u] = -(outI >> 15u);
+
+    /* update coefficient pointer */
+    pCoefB = pCoefB + (2u * modifier);
+    pCoefA = pCoefA + (2u * modifier);
+
+    i++;
+
+  }
+
+  pDst[2u * fftLen] = pSrc[0] - pSrc[1];
+  pDst[(2u * fftLen) + 1u] = 0;
+
+  pDst[0] = pSrc[0] + pSrc[1];
+  pDst[1] = 0;
+
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  i = 1u;
+
+  while(i < fftLen)
+  {
+    /*    
+       outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]    
+       + pSrc[2 * n - 2 * i] * pBTable[2 * i] +    
+       pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);    
+     */
+
+    outR = *pSrc1 * *pCoefA;
+    outR = outR - (*(pSrc1 + 1) * *(pCoefA + 1));
+    outR = outR + (*pSrc2 * *pCoefB);
+    outR = (outR + (*(pSrc2 + 1) * *(pCoefB + 1))) >> 15;
+
+
+    /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +    
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);   
+     */
+
+    outI = *pSrc2 * *(pCoefB + 1);
+    outI = outI - (*(pSrc2 + 1) * *pCoefB);
+    outI = outI + (*(pSrc1 + 1) * *pCoefA);
+    outI = outI + (*pSrc1 * *(pCoefA + 1));
+
+    /* update input pointers */
+    pSrc1 += 2u;
+    pSrc2 -= 2u;
+
+    /* write output */
+    pDst[2u * i] = (q15_t) outR;
+    pDst[(2u * i) + 1u] = outI >> 15u;
+
+    /* write complex conjugate output */
+    pDst[(4u * fftLen) - (2u * i)] = (q15_t) outR;
+    pDst[((4u * fftLen) - (2u * i)) + 1u] = -(outI >> 15u);
+
+    /* update coefficient pointer */
+    pCoefB = pCoefB + (2u * modifier);
+    pCoefA = pCoefA + (2u * modifier);
+
+    i++;
+
+  }
+
+  pDst[2u * fftLen] = pSrc[0] - pSrc[1];
+  pDst[(2u * fftLen) + 1u] = 0;
+
+  pDst[0] = pSrc[0] + pSrc[1];
+  pDst[1] = 0;
+
+#endif /* #ifndef ARM_MATH_CM0_FAMILY */
+
+}
+
+
+/**    
+ * @brief  Core Real IFFT process    
+ * @param[in]   *pSrc 				points to the input buffer.    
+ * @param[in]   fftLen  		    length of FFT.   
+ * @param[in]   *pATable 			points to the twiddle Coef A buffer.   
+ * @param[in]   *pBTable 			points to the twiddle Coef B buffer.    
+ * @param[out]  *pDst 				points to the output buffer.   
+ * @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
+ * @return none.    
+ * The function implements a Real IFFT    
+ */
+void arm_split_rifft_q15(
+  q15_t * pSrc,
+  uint32_t fftLen,
+  q15_t * pATable,
+  q15_t * pBTable,
+  q15_t * pDst,
+  uint32_t modifier)
+{
+  uint32_t i;                                    /* Loop Counter */
+  q31_t outR, outI;                              /* Temporary variables for output */
+  q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
+  q15_t *pSrc1, *pSrc2;
+  q15_t *pDst1 = &pDst[0];
+
+  pCoefA = &pATable[0];
+  pCoefB = &pBTable[0];
+
+  pSrc1 = &pSrc[0];
+  pSrc2 = &pSrc[2u * fftLen];
+
+#ifndef ARM_MATH_CM0_FAMILY
+
+  /* Run the below code for Cortex-M4 and Cortex-M3 */
+
+  i = fftLen;
+
+  while(i > 0u)
+  {
+
+    /*    
+       outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +    
+       pIn[2 * n - 2 * i] * pBTable[2 * i] -    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);    
+
+       outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -    
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);    
+
+     */
+
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    /* pIn[2 * n - 2 * i] * pBTable[2 * i] -    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
+    outR = __SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB));
+
+#else
+
+    /* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] +  
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */
+    outR = -(__SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB)));
+
+#endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
+
+    /* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +    
+       pIn[2 * n - 2 * i] * pBTable[2 * i] */
+    outR = __SMLAD(*__SIMD32(pSrc1), *__SIMD32(pCoefA), outR) >> 15u;
+
+    /*    
+       -pIn[2 * n - 2 * i] * pBTable[2 * i + 1] +    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
+    outI = __SMUADX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
+
+    /* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    outI = __SMLSDX(*__SIMD32(pCoefA), *__SIMD32(pSrc1)++, -outI);
+
+#else
+
+    outI = __SMLSDX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), -outI);
+
+#endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
+    /* write output */
+
+#ifndef ARM_MATH_BIG_ENDIAN
+
+    *__SIMD32(pDst1)++ = __PKHBT(outR, (outI >> 15u), 16);
+
+#else
+
+    *__SIMD32(pDst1)++ = __PKHBT((outI >> 15u), outR, 16);
+
+#endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
+
+    /* update coefficient pointer */
+    pCoefB = pCoefB + (2u * modifier);
+    pCoefA = pCoefA + (2u * modifier);
+
+    i--;
+
+  }
+
+
+#else
+
+  /* Run the below code for Cortex-M0 */
+
+  i = fftLen;
+
+  while(i > 0u)
+  {
+
+    /*    
+       outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +    
+       pIn[2 * n - 2 * i] * pBTable[2 * i] -    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);    
+     */
+
+    outR = *pSrc2 * *pCoefB;
+    outR = outR - (*(pSrc2 + 1) * *(pCoefB + 1));
+    outR = outR + (*pSrc1 * *pCoefA);
+    outR = (outR + (*(pSrc1 + 1) * *(pCoefA + 1))) >> 15;
+
+    /*   
+       outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -   
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -   
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);   
+     */
+
+    outI = *(pSrc1 + 1) * *pCoefA;
+    outI = outI - (*pSrc1 * *(pCoefA + 1));
+    outI = outI - (*pSrc2 * *(pCoefB + 1));
+    outI = outI - (*(pSrc2 + 1) * *(pCoefB));
+
+    /* update input pointers */
+    pSrc1 += 2u;
+    pSrc2 -= 2u;
+
+    /* write output */
+    *pDst1++ = (q15_t) outR;
+    *pDst1++ = (q15_t) (outI >> 15);
+
+    /* update coefficient pointer */
+    pCoefB = pCoefB + (2u * modifier);
+    pCoefA = pCoefA + (2u * modifier);
+
+    i--;
+
+  }
+
+#endif /* #ifndef ARM_MATH_CM0_FAMILY */
+
+}
--- a/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_q31.c
+++ b/tool/mbed/mbed-sdk/libraries/dsp/cmsis_dsp/TransformFunctions/arm_rfft_q31.c
@ -0,0 +1,349 @@
+/* ----------------------------------------------------------------------    
+* Copyright (C) 2010-2013 ARM Limited. All rights reserved.    
+*    
+* $Date:        17. January 2013  
+* $Revision: 	V1.4.1  
+*    
+* Project: 	    CMSIS DSP Library    
+* Title:	    arm_rfft_q31.c    
+*    
+* Description:	RFFT & RIFFT Q31 process function    
+*    
+*    
+* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
+*  
+* Redistribution and use in source and binary forms, with or without 
+* modification, are permitted provided that the following conditions
+* are met:
+*   - Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*   - Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in
+*     the documentation and/or other materials provided with the 
+*     distribution.
+*   - Neither the name of ARM LIMITED nor the names of its contributors
+*     may be used to endorse or promote products derived from this
+*     software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+* POSSIBILITY OF SUCH DAMAGE.    
+* -------------------------------------------------------------------- */
+
+#include "arm_math.h"
+
+void arm_radix4_butterfly_inverse_q31(
+q31_t * pSrc,
+uint32_t fftLen,
+q31_t * pCoef,
+uint32_t twidCoefModifier);
+
+void arm_radix4_butterfly_q31(
+q31_t * pSrc,
+uint32_t fftLen,
+q31_t * pCoef,
+uint32_t twidCoefModifier);
+
+void arm_bitreversal_q31(
+q31_t * pSrc,
+uint32_t fftLen,
+uint16_t bitRevFactor,
+uint16_t * pBitRevTab);
+
+/*--------------------------------------------------------------------    
+*		Internal functions prototypes    
+--------------------------------------------------------------------*/
+
+void arm_split_rfft_q31(
+  q31_t * pSrc,
+  uint32_t fftLen,
+  q31_t * pATable,
+  q31_t * pBTable,
+  q31_t * pDst,
+  uint32_t modifier);
+
+void arm_split_rifft_q31(
+  q31_t * pSrc,
+  uint32_t fftLen,
+  q31_t * pATable,
+  q31_t * pBTable,
+  q31_t * pDst,
+  uint32_t modifier);
+
+/**    
+ * @addtogroup RealFFT    
+ * @{    
+ */
+
+/**    
+ * @brief Processing function for the Q31 RFFT/RIFFT.   
+ * @param[in]  *S    points to an instance of the Q31 RFFT/RIFFT structure.   
+ * @param[in]  *pSrc points to the input buffer.   
+ * @param[out] *pDst points to the output buffer.   
+ * @return none.   
+ *    
+ * \par Input an output formats:   
+ * \par    
+ * Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.   
+ * Hence the output format is different for different RFFT sizes.    
+ * The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:   
+ * \par    
+ * \image html RFFTQ31.gif "Input and Output Formats for Q31 RFFT"    
+ *    
+ * \par    
+ * \image html RIFFTQ31.gif "Input and Output Formats for Q31 RIFFT"    
+ */
+
+void arm_rfft_q31(
+  const arm_rfft_instance_q31 * S,
+  q31_t * pSrc,
+  q31_t * pDst)
+{
+  const arm_cfft_radix4_instance_q31 *S_CFFT = S->pCfft;
+
+  /* Calculation of RIFFT of input */
+  if(S->ifftFlagR == 1u)
+  {
+    /*  Real IFFT core process */
+    arm_split_rifft_q31(pSrc, S->fftLenBy2, S->pTwiddleAReal,
+                        S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+
+    /* Complex readix-4 IFFT process */
+    arm_radix4_butterfly_inverse_q31(pDst, S_CFFT->fftLen,
+                                     S_CFFT->pTwiddle,
+                                     S_CFFT->twidCoefModifier);
+    /* Bit reversal process */
+    if(S->bitReverseFlagR == 1u)
+    {
+      arm_bitreversal_q31(pDst, S_CFFT->fftLen,
+                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
+    }
+  }
+  else
+  {
+    /* Calculation of RFFT of input */
+
+    /* Complex readix-4 FFT process */
+    arm_radix4_butterfly_q31(pSrc, S_CFFT->fftLen,
+                             S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);
+
+    /* Bit reversal process */
+    if(S->bitReverseFlagR == 1u)
+    {
+      arm_bitreversal_q31(pSrc, S_CFFT->fftLen,
+                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
+    }
+
+    /*  Real FFT core process */
+    arm_split_rfft_q31(pSrc, S->fftLenBy2, S->pTwiddleAReal,
+                       S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+  }
+
+}
+
+
+  /**    
+   * @} end of RealFFT group    
+   */
+
+/**    
+ * @brief  Core Real FFT process    
+ * @param[in]   *pSrc 				points to the input buffer.    
+ * @param[in]   fftLen  			length of FFT.   
+ * @param[in]   *pATable 			points to the twiddle Coef A buffer.    
+ * @param[in]   *pBTable 			points to the twiddle Coef B buffer.    
+ * @param[out]  *pDst 				points to the output buffer.    
+ * @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
+ * @return none.    
+ */
+
+void arm_split_rfft_q31(
+  q31_t * pSrc,
+  uint32_t fftLen,
+  q31_t * pATable,
+  q31_t * pBTable,
+  q31_t * pDst,
+  uint32_t modifier)
+{
+  uint32_t i;                                    /* Loop Counter */
+  q31_t outR, outI;                              /* Temporary variables for output */
+  q31_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
+  q31_t CoefA1, CoefA2, CoefB1;                  /* Temporary variables for twiddle coefficients */
+  q31_t *pOut1 = &pDst[2], *pOut2 = &pDst[(4u * fftLen) - 1u];
+  q31_t *pIn1 = &pSrc[2], *pIn2 = &pSrc[(2u * fftLen) - 1u];
+
+  /* Init coefficient pointers */
+  pCoefA = &pATable[modifier * 2u];
+  pCoefB = &pBTable[modifier * 2u];
+
+  i = fftLen - 1u;
+
+  while(i > 0u)
+  {
+    /*    
+       outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]    
+       + pSrc[2 * n - 2 * i] * pBTable[2 * i] +    
+       pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);    
+     */
+
+    /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +    
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
+
+    CoefA1 = *pCoefA++;
+    CoefA2 = *pCoefA;
+
+    /* outR = (pSrc[2 * i] * pATable[2 * i] */
+    outR = ((int32_t) (((q63_t) * pIn1 * CoefA1) >> 32));
+
+    /* outI = pIn[2 * i] * pATable[2 * i + 1] */
+    outI = ((int32_t) (((q63_t) * pIn1++ * CoefA2) >> 32));
+
+    /* - pSrc[2 * i + 1] * pATable[2 * i + 1] */
+    outR =
+      (q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn1 * (-CoefA2))) >> 32);
+
+    /* (pIn[2 * i + 1] * pATable[2 * i] */
+    outI =
+      (q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn1++ * (CoefA1))) >> 32);
+
+    /* pSrc[2 * n - 2 * i] * pBTable[2 * i]  */
+    outR =
+      (q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn2 * (-CoefA2))) >> 32);
+    CoefB1 = *pCoefB;
+
+    /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
+    outI =
+      (q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn2-- * (-CoefB1))) >> 32);
+
+    /* pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
+    outR =
+      (q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn2 * (CoefB1))) >> 32);
+
+    /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
+    outI =
+      (q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn2-- * (-CoefA2))) >> 32);
+
+    /* write output */
+    *pOut1++ = (outR << 1u);
+    *pOut1++ = (outI << 1u);
+
+    /* write complex conjugate output */
+    *pOut2-- = -(outI << 1u);
+    *pOut2-- = (outR << 1u);
+
+    /* update coefficient pointer */
+    pCoefB = pCoefB + (modifier * 2u);
+    pCoefA = pCoefA + ((modifier * 2u) - 1u);
+
+    i--;
+
+  }
+
+  pDst[2u * fftLen] = pSrc[0] - pSrc[1];
+  pDst[(2u * fftLen) + 1u] = 0;
+
+  pDst[0] = pSrc[0] + pSrc[1];
+  pDst[1] = 0;
+
+}
+
+
+/**    
+ * @brief  Core Real IFFT process    
+ * @param[in]   *pSrc 				points to the input buffer.   
+ * @param[in]   fftLen  			length of FFT.    
+ * @param[in]   *pATable 			points to the twiddle Coef A buffer.   
+ * @param[in]   *pBTable 			points to the twiddle Coef B buffer.    
+ * @param[out]  *pDst 				points to the output buffer.   
+ * @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.   
+ * @return none.    
+ */
+
+void arm_split_rifft_q31(
+  q31_t * pSrc,
+  uint32_t fftLen,
+  q31_t * pATable,
+  q31_t * pBTable,
+  q31_t * pDst,
+  uint32_t modifier)
+{
+  q31_t outR, outI;                              /* Temporary variables for output */
+  q31_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
+  q31_t CoefA1, CoefA2, CoefB1;                  /* Temporary variables for twiddle coefficients */
+  q31_t *pIn1 = &pSrc[0], *pIn2 = &pSrc[(2u * fftLen) + 1u];
+
+  pCoefA = &pATable[0];
+  pCoefB = &pBTable[0];
+
+  while(fftLen > 0u)
+  {
+    /*    
+       outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +    
+       pIn[2 * n - 2 * i] * pBTable[2 * i] -    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);    
+
+       outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -    
+       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -    
+       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);    
+
+     */
+    CoefA1 = *pCoefA++;
+    CoefA2 = *pCoefA;
+
+    /* outR = (pIn[2 * i] * pATable[2 * i] */
+    outR = ((int32_t) (((q63_t) * pIn1 * CoefA1) >> 32));
+
+    /* - pIn[2 * i] * pATable[2 * i + 1] */
+    outI = -((int32_t) (((q63_t) * pIn1++ * CoefA2) >> 32));
+
+    /* pIn[2 * i + 1] * pATable[2 * i + 1] */
+    outR =
+      (q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn1 * (CoefA2))) >> 32);
+
+    /* pIn[2 * i + 1] * pATable[2 * i] */
+    outI =
+      (q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn1++ * (CoefA1))) >> 32);
+
+    /* pIn[2 * n - 2 * i] * pBTable[2 * i] */
+    outR =
+      (q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn2 * (CoefA2))) >> 32);
+
+    CoefB1 = *pCoefB;
+
+    /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
+    outI =
+      (q31_t) ((((q63_t) outI << 32) - ((q63_t) * pIn2-- * (CoefB1))) >> 32);
+
+    /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
+    outR =
+      (q31_t) ((((q63_t) outR << 32) + ((q63_t) * pIn2 * (CoefB1))) >> 32);
+
+    /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
+    outI =
+      (q31_t) ((((q63_t) outI << 32) + ((q63_t) * pIn2-- * (CoefA2))) >> 32);
+
+    /* write output */
+    *pDst++ = (outR << 1u);
+    *pDst++ = (outI << 1u);
+
+    /* update coefficient pointer */
+    pCoefB = pCoefB + (modifier * 2u);
+    pCoefA = pCoefA + ((modifier * 2u) - 1u);
+
+    /* Decrement loop count */
+    fftLen--;
+
+  }
+
+
+}