arm_relu_q15.c 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. /*
  2. * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the License); you may
  7. * not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  14. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /* ----------------------------------------------------------------------
  19. * Project: CMSIS NN Library
  20. * Title: arm_relu_q15.c
  21. * Description: Q15 version of ReLU
  22. *
  23. * $Date: 17. January 2018
  24. * $Revision: V.1.0.0
  25. *
  26. * Target Processor: Cortex-M cores
  27. *
  28. * -------------------------------------------------------------------- */
  29. #include "arm_math.h"
  30. #include "arm_nnfunctions.h"
  31. /**
  32. * @ingroup groupNN
  33. */
  34. /**
  35. * @addtogroup Acti
  36. * @{
  37. */
  38. /**
  39. * @brief Q15 RELU function
  40. * @param[in,out] data pointer to input
  41. * @param[in] size number of elements
  42. * @return none.
  43. *
  44. * @details
  45. *
  46. * Optimized relu with QSUB instructions.
  47. *
  48. */
  49. void arm_relu_q15(q15_t * data, uint16_t size)
  50. {
  51. #if defined (ARM_MATH_DSP)
  52. /* Run the following code for Cortex-M4 and Cortex-M7 */
  53. uint16_t i = size >> 1;
  54. q15_t *pIn = data;
  55. q15_t *pOut = data;
  56. q31_t in;
  57. q31_t buf;
  58. q31_t mask;
  59. while (i)
  60. {
  61. in = *__SIMD32(pIn)++;
  62. /* extract the first bit */
  63. buf = __ROR(in & 0x80008000, 15);
  64. /* if MSB=1, mask will be 0xFF, 0x0 otherwise */
  65. mask = __QSUB16(0x00000000, buf);
  66. *__SIMD32(pOut)++ = in & (~mask);
  67. i--;
  68. }
  69. if (size & 0x1)
  70. {
  71. if (*pIn < 0)
  72. {
  73. *pIn = 0;
  74. }
  75. pIn++;
  76. }
  77. #else
  78. /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
  79. uint16_t i;
  80. for (i = 0; i < size; i++)
  81. {
  82. if (data[i] < 0)
  83. data[i] = 0;
  84. }
  85. #endif /* ARM_MATH_DSP */
  86. }
  87. /**
  88. * @} end of Acti group
  89. */