arm_relu_q7.c 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. /*
  2. * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the License); you may
  7. * not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an AS IS BASIS, WITHOUT
  14. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. /* ----------------------------------------------------------------------
  19. * Project: CMSIS NN Library
  20. * Title: arm_relu_q7.c
  21. * Description: Q7 version of ReLU
  22. *
  23. * $Date: 17. January 2018
  24. * $Revision: V.1.0.0
  25. *
  26. * Target Processor: Cortex-M cores
  27. *
  28. * -------------------------------------------------------------------- */
  29. #include "arm_math.h"
  30. #include "arm_nnfunctions.h"
  31. /**
  32. * @ingroup groupNN
  33. */
  34. /**
  35. * @addtogroup Acti
  36. * @{
  37. */
  38. /**
  39. * @brief Q7 RELU function
  40. * @param[in,out] data pointer to input
  41. * @param[in] size number of elements
  42. * @return none.
  43. *
  44. * @details
  45. *
  46. * Optimized relu with QSUB instructions.
  47. *
  48. */
  49. void arm_relu_q7(q7_t * data, uint16_t size)
  50. {
  51. #if defined (ARM_MATH_DSP)
  52. /* Run the following code for Cortex-M4 and Cortex-M7 */
  53. uint16_t i = size >> 2;
  54. q7_t *pIn = data;
  55. q7_t *pOut = data;
  56. q31_t in;
  57. q31_t buf;
  58. q31_t mask;
  59. while (i)
  60. {
  61. in = *__SIMD32(pIn)++;
  62. /* extract the first bit */
  63. buf = __ROR(in & 0x80808080, 7);
  64. /* if MSB=1, mask will be 0xFF, 0x0 otherwise */
  65. mask = __QSUB8(0x00000000, buf);
  66. *__SIMD32(pOut)++ = in & (~mask);
  67. i--;
  68. }
  69. i = size & 0x3;
  70. while (i)
  71. {
  72. if (*pIn < 0)
  73. {
  74. *pIn = 0;
  75. }
  76. pIn++;
  77. i--;
  78. }
  79. #else
  80. /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
  81. uint16_t i;
  82. for (i = 0; i < size; i++)
  83. {
  84. if (data[i] < 0)
  85. data[i] = 0;
  86. }
  87. #endif /* ARM_MATH_DSP */
  88. }
  89. /**
  90. * @} end of Acti group
  91. */