Note: We no longer publish the latest version of our code here. We primarily use a kumc-bmi github organization. The heron ETL repository, in particular, is not public. Peers in the informatics community should see MultiSiteDev for details on requesting access.

source: webrtc/webrtc/modules/audio_processing/aecm/aecm_core_neon.S @ 0:4bda6873e34c

pub_scrub_3792 tip
Last change on this file since 0:4bda6873e34c was 0:4bda6873e34c, checked in by Michael Prittie <mprittie@…>, 6 years ago

Scrubbed password for publication.

File size: 5.9 KB
Line 
1@
2@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3@
4@ Use of this source code is governed by a BSD-style license
5@ that can be found in the LICENSE file in the root of the source
6@ tree. An additional intellectual property rights grant can be found
7@ in the file PATENTS.  All contributing project authors may
8@ be found in the AUTHORS file in the root of the source tree.
9@
10
11@ aecm_core_neon.s
12@ This file contains some functions in AECM, optimized for ARM Neon
13@ platforms. Reference C code is in file aecm_core.c. Bit-exact.
14
15#include "aecm_core_neon_offsets.h"
16#include "webrtc/modules/audio_processing/aecm/aecm_defines.h"
17#include "webrtc/system_wrappers/interface/asm_defines.h"
18
19GLOBAL_LABEL WebRtcAecm_kSqrtHanning
20GLOBAL_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
21GLOBAL_FUNCTION WebRtcAecm_StoreAdaptiveChannelNeon
22GLOBAL_FUNCTION WebRtcAecm_ResetAdaptiveChannelNeon
23
24@ void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore_t* aecm,
25@                                        const uint16_t* far_spectrum,
26@                                        int32_t* echo_est,
27@                                        uint32_t* far_energy,
28@                                        uint32_t* echo_energy_adapt,
29@                                        uint32_t* echo_energy_stored);
30.align 2
31DEFINE_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
32  push {r4-r7}
33
34  vmov.i32 q14, #0
35  vmov.i32 q8,  #0
36  vmov.i32 q9,  #0
37
38  movw r7, #offset_aecm_channelStored
39  movw r5, #offset_aecm_channelAdapt16
40
41  mov r4, r2
42  mov r12, #(PART_LEN / 8)                   @  Loop counter, unrolled by 8.
43  ldr r6, [r0, r7]
44  ldr r7, [r0, r5]
45
46LOOP_CALC_LINEAR_ENERGIES:
47  vld1.16 {d26, d27}, [r1]!                  @ far_spectrum[i]
48  vld1.16 {d24, d25}, [r6, :128]!            @ &aecm->channelStored[i]
49  vld1.16 {d0, d1}, [r7, :128]!              @ &aecm->channelAdapt16[i]
50  vaddw.u16 q14, q14, d26
51  vmull.u16 q10, d26, d24
52  vmull.u16 q11, d27, d25
53  vaddw.u16 q14, q14, d27
54  vmull.u16 q1, d26, d0
55  vst1.32 {q10, q11}, [r4, :256]!            @ &echo_est[i]
56  vadd.u32 q8, q10
57  vmull.u16 q2, d27, d1
58  vadd.u32 q8, q11
59  vadd.u32 q9, q1
60  subs r12, #1
61  vadd.u32 q9, q2
62  bgt LOOP_CALC_LINEAR_ENERGIES
63
64  vadd.u32 d28, d29
65  vpadd.u32 d28, d28
66  vmov.32 r12, d28[0]
67  vadd.u32 d18, d19
68  vpadd.u32 d18, d18
69  vmov.32 r5, d18[0]                         @ echo_energy_adapt_r
70  vadd.u32 d16, d17
71  vpadd.u32 d16, d16
72
73  ldrh  r1, [r1]                             @ far_spectrum[i]
74  add r12, r12, r1
75  str r12, [r3]                              @ far_energy
76  vmov.32 r2, d16[0]
77
78  ldrsh r12, [r6]                            @ aecm->channelStored[i]
79  ldrh  r6, [r7]                             @ aecm->channelAdapt16[i]
80  mul r0, r12, r1
81  mla r1, r6, r1, r5
82  add r2, r2, r0
83  str r0, [r4]                               @ echo_est[i]
84  ldr r4, [sp, #20]                          @ &echo_energy_stored
85  str r2, [r4]
86  ldr r3, [sp, #16]                          @ &echo_energy_adapt
87  str r1, [r3]
88
89  pop {r4-r7}
90  bx  lr
91
92@ void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore_t* aecm,
93@                                          const uint16_t* far_spectrum,
94@                                          int32_t* echo_est);
95.align 2
96DEFINE_FUNCTION WebRtcAecm_StoreAdaptiveChannelNeon
97  movw r3, #offset_aecm_channelAdapt16
98  movw r12, #offset_aecm_channelStored
99  ldr r3, [r0, r3]
100  ldr r0, [r0, r12]
101  mov r12, #(PART_LEN / 8)                   @ Loop counter, unrolled by 8.
102
103LOOP_STORE_ADAPTIVE_CHANNEL:
104  vld1.16 {d24, d25}, [r3, :128]!            @ &aecm->channelAdapt16[i]
105  vld1.16 {d26, d27}, [r1]!                  @ &far_spectrum[i]
106  vst1.16 {d24, d25}, [r0, :128]!            @ &aecm->channelStored[i]
107  vmull.u16 q10, d26, d24
108  vmull.u16 q11, d27, d25
109  vst1.16 {q10, q11}, [r2, :256]!            @ echo_est[i]
110  subs r12, #1
111  bgt LOOP_STORE_ADAPTIVE_CHANNEL
112
113  ldrsh  r12, [r3]
114  strh  r12, [r0]
115  ldrh  r1, [r1]
116  mul r3, r1, r12
117  str r3, [r2]
118
119  bx  lr
120
121@ void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore_t* aecm);
122.align 2
123DEFINE_FUNCTION WebRtcAecm_ResetAdaptiveChannelNeon
124  movw r1, #offset_aecm_channelAdapt16
125  movw r2, #offset_aecm_channelAdapt32
126  movw r3, #offset_aecm_channelStored
127  ldr r1, [r0, r1]                           @ &aecm->channelAdapt16[0]
128  ldr r2, [r0, r2]                           @ &aecm->channelAdapt32[0]
129  ldr r0, [r0, r3]                           @ &aecm->channelStored[0]
130  mov r3, #(PART_LEN / 8)                    @ Loop counter, unrolled by 8.
131
132LOOP_RESET_ADAPTIVE_CHANNEL:
133  vld1.16 {d24, d25}, [r0, :128]!
134  subs r3, #1
135  vst1.16 {d24, d25}, [r1, :128]!
136  vshll.s16 q10, d24, #16
137  vshll.s16 q11, d25, #16
138  vst1.16 {q10, q11}, [r2, :256]!
139  bgt LOOP_RESET_ADAPTIVE_CHANNEL
140
141  ldrh  r0, [r0]
142  strh  r0, [r1]
143  mov r0, r0, asl #16
144  str r0, [r2]
145
146  bx  lr
147
148@ Square root of Hanning window in Q14.
149.align 4
150WebRtcAecm_kSqrtHanning:
151_WebRtcAecm_kSqrtHanning:
152  .short 0
153  .short 399, 798, 1196, 1594, 1990, 2386, 2780, 3172
154  .short 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224
155  .short 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040
156  .short 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514
157  .short 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553
158  .short 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079
159  .short 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034
160  .short 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
161
162@ Square root of Hanning window in Q14. Compared to WebRtcAecm_kSqrtHanning,
163@ the order was reversed and one element (0) was removed.
164.align 4
165kSqrtHanningReversed:
166  .short 16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111, 16034, 15947
167  .short 15851, 15746, 15631, 15506, 15373, 15231, 15079, 14918, 14749, 14571
168  .short 14384, 14189, 13985, 13773, 13553, 13325, 13089, 12845, 12594, 12335
169  .short 12068, 11795, 11514, 11227, 10933, 10633, 10326, 10013, 9695, 9370
170  .short 9040, 8705, 8364, 8019, 7668, 7313, 6954, 6591, 6224, 5853, 5478, 5101
171  .short 4720, 4337, 3951, 3562, 3172, 2780, 2386, 1990, 1594, 1196, 798, 399
Note: See TracBrowser for help on using the repository browser.