3535#endif
3636
3737static const double kPi = 3.14159265358979323846 ;
38+ static const int kDefaultTapsPerPhase = 16 ;
3839
3940template <typename T>
4041static inline T*
@@ -64,9 +65,37 @@ dsd_neo_sinc(double x) {
6465 return sin (kPi * x) / (kPi * x);
6566}
6667
68+ static inline float
69+ resamp_dot_contiguous (const float * DSD_NEO_RESTRICT samples, const float * DSD_NEO_RESTRICT taps, int len) {
70+ float acc = 0 .0f ;
71+ DSD_NEO_IVDEP
72+ for (int k = 0 ; k < len; k++) {
73+ acc += samples[k] * taps[k];
74+ }
75+ return acc;
76+ }
77+
78+ static inline float
79+ resamp_dot16_contiguous (const float * DSD_NEO_RESTRICT samples, const float * DSD_NEO_RESTRICT taps) {
80+ float acc0 = 0 .0f ;
81+ float acc1 = 0 .0f ;
82+ float acc2 = 0 .0f ;
83+ float acc3 = 0 .0f ;
84+
85+ DSD_NEO_IVDEP
86+ for (int k = 0 ; k < 16 ; k += 4 ) {
87+ acc0 += samples[k + 0 ] * taps[k + 0 ];
88+ acc1 += samples[k + 1 ] * taps[k + 1 ];
89+ acc2 += samples[k + 2 ] * taps[k + 2 ];
90+ acc3 += samples[k + 3 ] * taps[k + 3 ];
91+ }
92+
93+ return (acc0 + acc1) + (acc2 + acc3);
94+ }
95+
6796void
6897resamp_design (struct demod_state * s, int L, int M) {
69- int taps_per_phase = 16 ; /* K */
98+ int taps_per_phase = kDefaultTapsPerPhase ; /* K */
7099 if (taps_per_phase < 8 ) {
71100 taps_per_phase = 8 ;
72101 }
@@ -92,22 +121,22 @@ resamp_design(struct demod_state* s, int L, int M) {
92121 s->resamp_taps = (float *)mem_ptr;
93122 }
94123 {
95- void * mem_ptr = dsd_neo_aligned_malloc ((size_t )taps_per_phase * sizeof (float ));
124+ void * mem_ptr = dsd_neo_aligned_malloc ((size_t )taps_per_phase * 2U * sizeof (float ));
96125 s->resamp_hist = (float *)mem_ptr;
97126 }
98127 if (!s->resamp_taps || !s->resamp_hist ) {
99128 if (s->resamp_taps ) {
100- free (s->resamp_taps );
129+ dsd_neo_aligned_free (s->resamp_taps );
101130 s->resamp_taps = NULL ;
102131 }
103132 if (s->resamp_hist ) {
104- free (s->resamp_hist );
133+ dsd_neo_aligned_free (s->resamp_hist );
105134 s->resamp_hist = NULL ;
106135 }
107136 s->resamp_enabled = 0 ;
108137 return ;
109138 }
110- memset (s->resamp_hist , 0 , (size_t )taps_per_phase * sizeof (float ));
139+ memset (s->resamp_hist , 0 , (size_t )taps_per_phase * 2U * sizeof (float ));
111140 s->resamp_hist_head = 0 ;
112141
113142 double gain = 0.0 ;
@@ -122,12 +151,16 @@ resamp_design(struct demod_state* s, int L, int M) {
122151 }
123152
124153 const double phase_gain_comp = (double )L;
125- for (int n = 0 ; n < N; n++) {
126- int m = n - mid;
127- double w = 0.54 - 0.46 * cos (2.0 * kPi * (double )n / (double )(N - 1 ));
128- double h = 2.0 * fc * dsd_neo_sinc (2.0 * fc * (double )m);
129- double t = (h * w / gain) * phase_gain_comp;
130- s->resamp_taps [n] = (float )t;
154+ for (int phase = 0 ; phase < L; phase++) {
155+ float * phase_taps = s->resamp_taps + (size_t )phase * (size_t )taps_per_phase;
156+ for (int k = 0 ; k < taps_per_phase; k++) {
157+ int src_index = phase + ((taps_per_phase - 1 - k) * L);
158+ int m = src_index - mid;
159+ double w = 0.54 - 0.46 * cos (2.0 * kPi * (double )src_index / (double )(N - 1 ));
160+ double h = 2.0 * fc * dsd_neo_sinc (2.0 * fc * (double )m);
161+ double t = (h * w / gain) * phase_gain_comp;
162+ phase_taps[k] = (float )t;
163+ }
131164 }
132165
133166 s->resamp_L = L;
@@ -159,26 +192,17 @@ resamp_process_block(struct demod_state* s, const float* DSD_NEO_RESTRICT in, in
159192
160193 for (int n = 0 ; n < in_len; n++) {
161194 hist[head] = in_al[n];
195+ hist[head + K] = in_al[n];
162196 head++;
163197 if (head == K) {
164198 head = 0 ;
165199 }
166200 int local_phase = phase;
201+ const float * DSD_NEO_RESTRICT hist_window = hist + head;
167202 while (local_phase < L) {
168- float acc = 0 .0f ;
169- const float * DSD_NEO_RESTRICT tk = taps_al + local_phase;
170- int idx = head - 1 ;
171- if (idx < 0 ) {
172- idx += K;
173- }
174- for (int k = 0 ; k < K; k++) {
175- acc += hist[idx] * tk[0 ];
176- tk += L;
177- idx--;
178- if (idx < 0 ) {
179- idx += K;
180- }
181- }
203+ const float * DSD_NEO_RESTRICT phase_taps = taps_al + (size_t )local_phase * (size_t )K;
204+ float acc = (K == kDefaultTapsPerPhase ) ? resamp_dot16_contiguous (hist_window, phase_taps)
205+ : resamp_dot_contiguous (hist_window, phase_taps, K);
182206 out_al[out_len++] = acc;
183207 local_phase += M;
184208 }
0 commit comments