arm_cmplx_mult_cmplx_q31.c 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. /* ----------------------------------------------------------------------
  2. * Project: CMSIS DSP Library
  3. * Title: arm_cmplx_mult_cmplx_q31.c
  4. * Description: Q31 complex-by-complex multiplication
  5. *
  6. * $Date: 27. January 2017
  7. * $Revision: V.1.5.1
  8. *
  9. * Target Processor: Cortex-M cores
  10. * -------------------------------------------------------------------- */
  11. /*
  12. * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
  13. *
  14. * SPDX-License-Identifier: Apache-2.0
  15. *
  16. * Licensed under the Apache License, Version 2.0 (the License); you may
  17. * not use this file except in compliance with the License.
  18. * You may obtain a copy of the License at
  19. *
  20. * www.apache.org/licenses/LICENSE-2.0
  21. *
  22. * Unless required by applicable law or agreed to in writing, software
  23. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  24. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  25. * See the License for the specific language governing permissions and
  26. * limitations under the License.
  27. */
  28. #include "arm_math.h"
  29. /**
  30. * @ingroup groupCmplxMath
  31. */
  32. /**
  33. * @addtogroup CmplxByCmplxMult
  34. * @{
  35. */
  36. /**
  37. * @brief Q31 complex-by-complex multiplication
  38. * @param[in] *pSrcA points to the first input vector
  39. * @param[in] *pSrcB points to the second input vector
  40. * @param[out] *pDst points to the output vector
  41. * @param[in] numSamples number of complex samples in each vector
  42. * @return none.
  43. *
  44. * <b>Scaling and Overflow Behavior:</b>
  45. * \par
  46. * The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
  47. * Input down scaling is not required.
  48. */
  49. void arm_cmplx_mult_cmplx_q31(
  50. q31_t * pSrcA,
  51. q31_t * pSrcB,
  52. q31_t * pDst,
  53. uint32_t numSamples)
  54. {
  55. q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */
  56. uint32_t blkCnt; /* loop counters */
  57. q31_t mul1, mul2, mul3, mul4;
  58. q31_t out1, out2;
  59. #if defined (ARM_MATH_DSP)
  60. /* Run the below code for Cortex-M4 and Cortex-M3 */
  61. /* loop Unrolling */
  62. blkCnt = numSamples >> 2U;
  63. /* First part of the processing with loop unrolling. Compute 4 outputs at a time.
  64. ** a second loop below computes the remaining 1 to 3 samples. */
  65. while (blkCnt > 0U)
  66. {
  67. /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
  68. /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
  69. a = *pSrcA++;
  70. b = *pSrcA++;
  71. c = *pSrcB++;
  72. d = *pSrcB++;
  73. mul1 = (q31_t) (((q63_t) a * c) >> 32);
  74. mul2 = (q31_t) (((q63_t) b * d) >> 32);
  75. mul3 = (q31_t) (((q63_t) a * d) >> 32);
  76. mul4 = (q31_t) (((q63_t) b * c) >> 32);
  77. mul1 = (mul1 >> 1);
  78. mul2 = (mul2 >> 1);
  79. mul3 = (mul3 >> 1);
  80. mul4 = (mul4 >> 1);
  81. out1 = mul1 - mul2;
  82. out2 = mul3 + mul4;
  83. /* store the real result in 3.29 format in the destination buffer. */
  84. *pDst++ = out1;
  85. /* store the imag result in 3.29 format in the destination buffer. */
  86. *pDst++ = out2;
  87. a = *pSrcA++;
  88. b = *pSrcA++;
  89. c = *pSrcB++;
  90. d = *pSrcB++;
  91. mul1 = (q31_t) (((q63_t) a * c) >> 32);
  92. mul2 = (q31_t) (((q63_t) b * d) >> 32);
  93. mul3 = (q31_t) (((q63_t) a * d) >> 32);
  94. mul4 = (q31_t) (((q63_t) b * c) >> 32);
  95. mul1 = (mul1 >> 1);
  96. mul2 = (mul2 >> 1);
  97. mul3 = (mul3 >> 1);
  98. mul4 = (mul4 >> 1);
  99. out1 = mul1 - mul2;
  100. out2 = mul3 + mul4;
  101. /* store the real result in 3.29 format in the destination buffer. */
  102. *pDst++ = out1;
  103. /* store the imag result in 3.29 format in the destination buffer. */
  104. *pDst++ = out2;
  105. a = *pSrcA++;
  106. b = *pSrcA++;
  107. c = *pSrcB++;
  108. d = *pSrcB++;
  109. mul1 = (q31_t) (((q63_t) a * c) >> 32);
  110. mul2 = (q31_t) (((q63_t) b * d) >> 32);
  111. mul3 = (q31_t) (((q63_t) a * d) >> 32);
  112. mul4 = (q31_t) (((q63_t) b * c) >> 32);
  113. mul1 = (mul1 >> 1);
  114. mul2 = (mul2 >> 1);
  115. mul3 = (mul3 >> 1);
  116. mul4 = (mul4 >> 1);
  117. out1 = mul1 - mul2;
  118. out2 = mul3 + mul4;
  119. /* store the real result in 3.29 format in the destination buffer. */
  120. *pDst++ = out1;
  121. /* store the imag result in 3.29 format in the destination buffer. */
  122. *pDst++ = out2;
  123. a = *pSrcA++;
  124. b = *pSrcA++;
  125. c = *pSrcB++;
  126. d = *pSrcB++;
  127. mul1 = (q31_t) (((q63_t) a * c) >> 32);
  128. mul2 = (q31_t) (((q63_t) b * d) >> 32);
  129. mul3 = (q31_t) (((q63_t) a * d) >> 32);
  130. mul4 = (q31_t) (((q63_t) b * c) >> 32);
  131. mul1 = (mul1 >> 1);
  132. mul2 = (mul2 >> 1);
  133. mul3 = (mul3 >> 1);
  134. mul4 = (mul4 >> 1);
  135. out1 = mul1 - mul2;
  136. out2 = mul3 + mul4;
  137. /* store the real result in 3.29 format in the destination buffer. */
  138. *pDst++ = out1;
  139. /* store the imag result in 3.29 format in the destination buffer. */
  140. *pDst++ = out2;
  141. /* Decrement the blockSize loop counter */
  142. blkCnt--;
  143. }
  144. /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
  145. ** No loop unrolling is used. */
  146. blkCnt = numSamples % 0x4U;
  147. while (blkCnt > 0U)
  148. {
  149. /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
  150. /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
  151. a = *pSrcA++;
  152. b = *pSrcA++;
  153. c = *pSrcB++;
  154. d = *pSrcB++;
  155. mul1 = (q31_t) (((q63_t) a * c) >> 32);
  156. mul2 = (q31_t) (((q63_t) b * d) >> 32);
  157. mul3 = (q31_t) (((q63_t) a * d) >> 32);
  158. mul4 = (q31_t) (((q63_t) b * c) >> 32);
  159. mul1 = (mul1 >> 1);
  160. mul2 = (mul2 >> 1);
  161. mul3 = (mul3 >> 1);
  162. mul4 = (mul4 >> 1);
  163. out1 = mul1 - mul2;
  164. out2 = mul3 + mul4;
  165. /* store the real result in 3.29 format in the destination buffer. */
  166. *pDst++ = out1;
  167. /* store the imag result in 3.29 format in the destination buffer. */
  168. *pDst++ = out2;
  169. /* Decrement the blockSize loop counter */
  170. blkCnt--;
  171. }
  172. #else
  173. /* Run the below code for Cortex-M0 */
  174. /* loop Unrolling */
  175. blkCnt = numSamples >> 1U;
  176. /* First part of the processing with loop unrolling. Compute 2 outputs at a time.
  177. ** a second loop below computes the remaining 1 sample. */
  178. while (blkCnt > 0U)
  179. {
  180. /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
  181. /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
  182. a = *pSrcA++;
  183. b = *pSrcA++;
  184. c = *pSrcB++;
  185. d = *pSrcB++;
  186. mul1 = (q31_t) (((q63_t) a * c) >> 32);
  187. mul2 = (q31_t) (((q63_t) b * d) >> 32);
  188. mul3 = (q31_t) (((q63_t) a * d) >> 32);
  189. mul4 = (q31_t) (((q63_t) b * c) >> 32);
  190. mul1 = (mul1 >> 1);
  191. mul2 = (mul2 >> 1);
  192. mul3 = (mul3 >> 1);
  193. mul4 = (mul4 >> 1);
  194. out1 = mul1 - mul2;
  195. out2 = mul3 + mul4;
  196. /* store the real result in 3.29 format in the destination buffer. */
  197. *pDst++ = out1;
  198. /* store the imag result in 3.29 format in the destination buffer. */
  199. *pDst++ = out2;
  200. a = *pSrcA++;
  201. b = *pSrcA++;
  202. c = *pSrcB++;
  203. d = *pSrcB++;
  204. mul1 = (q31_t) (((q63_t) a * c) >> 32);
  205. mul2 = (q31_t) (((q63_t) b * d) >> 32);
  206. mul3 = (q31_t) (((q63_t) a * d) >> 32);
  207. mul4 = (q31_t) (((q63_t) b * c) >> 32);
  208. mul1 = (mul1 >> 1);
  209. mul2 = (mul2 >> 1);
  210. mul3 = (mul3 >> 1);
  211. mul4 = (mul4 >> 1);
  212. out1 = mul1 - mul2;
  213. out2 = mul3 + mul4;
  214. /* store the real result in 3.29 format in the destination buffer. */
  215. *pDst++ = out1;
  216. /* store the imag result in 3.29 format in the destination buffer. */
  217. *pDst++ = out2;
  218. /* Decrement the blockSize loop counter */
  219. blkCnt--;
  220. }
  221. /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
  222. ** No loop unrolling is used. */
  223. blkCnt = numSamples % 0x2U;
  224. while (blkCnt > 0U)
  225. {
  226. /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
  227. /* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
  228. a = *pSrcA++;
  229. b = *pSrcA++;
  230. c = *pSrcB++;
  231. d = *pSrcB++;
  232. mul1 = (q31_t) (((q63_t) a * c) >> 32);
  233. mul2 = (q31_t) (((q63_t) b * d) >> 32);
  234. mul3 = (q31_t) (((q63_t) a * d) >> 32);
  235. mul4 = (q31_t) (((q63_t) b * c) >> 32);
  236. mul1 = (mul1 >> 1);
  237. mul2 = (mul2 >> 1);
  238. mul3 = (mul3 >> 1);
  239. mul4 = (mul4 >> 1);
  240. out1 = mul1 - mul2;
  241. out2 = mul3 + mul4;
  242. /* store the real result in 3.29 format in the destination buffer. */
  243. *pDst++ = out1;
  244. /* store the imag result in 3.29 format in the destination buffer. */
  245. *pDst++ = out2;
  246. /* Decrement the blockSize loop counter */
  247. blkCnt--;
  248. }
  249. #endif /* #if defined (ARM_MATH_DSP) */
  250. }
  251. /**
  252. * @} end of CmplxByCmplxMult group
  253. */