@@ -33,9 +33,12 @@ typedef double xreal;
3333#define FFTW_MALLOC fftw_malloc
3434#define FFTW_FREE fftw_free
3535#define FFTW_PLAN_DFT_1D fftw_plan_dft_1d
36+ #define FFTW_PLAN_DFT_2D fftw_plan_dft_2d
3637#define FFTW_EXECUTE fftw_execute
3738#define FFTW_DESTROY_PLAN fftw_destroy_plan
39+ #define FFTW_CLEANUP fftw_cleanup
3840#define SLEEFDFT_INIT1D SleefDFT_double_init1d
41+ #define SLEEFDFT_INIT2D SleefDFT_double_init2d
3942#elif BASETYPEID == 2
4043typedef float xreal;
4144#define FFTW_COMPLEX fftwf_complex
@@ -44,9 +47,12 @@ typedef float xreal;
4447#define FFTW_MALLOC fftwf_malloc
4548#define FFTW_FREE fftwf_free
4649#define FFTW_PLAN_DFT_1D fftwf_plan_dft_1d
50+ #define FFTW_PLAN_DFT_2D fftwf_plan_dft_2d
4751#define FFTW_EXECUTE fftwf_execute
4852#define FFTW_DESTROY_PLAN fftwf_destroy_plan
53+ #define FFTW_CLEANUP fftwf_cleanup
4954#define SLEEFDFT_INIT1D SleefDFT_float_init1d
55+ #define SLEEFDFT_INIT2D SleefDFT_float_init2d
5056#else
5157#error BASETYPEID not set
5258#endif
@@ -75,31 +81,44 @@ class FFTFramework {
7581 niter *= 2 ;
7682 }
7783
78- return ( double )niter * ns / (t1 - t0);
84+ return 1 + int64_t (( double )niter * ns / (t1 - t0) );
7985 }
8086};
8187
8288template <typename cplx>
8389class FWSleefDFT : public FFTFramework <cplx> {
84- const int n;
90+ const int n, m ;
8591 cplx* in;
8692 cplx* out;
8793 SleefDFT *plan;
8894
8995public:
90- FWSleefDFT (int n_, bool forward, bool mt= false ) : n(n_) {
96+ FWSleefDFT (int n_, int m_, bool forward, bool mt, bool check ) : n(n_), m(m_ ) {
9197 SleefDFT_setDefaultVerboseFP (stderr);
9298 SleefDFT_setPlanFilePath (NULL , NULL , SLEEF_PLAN_RESET);
93- in = (cplx*)Sleef_malloc (sizeof (cplx) * n);
94- out = (cplx*)Sleef_malloc (sizeof (cplx) * n);
95- plan = SLEEFDFT_INIT1D (n, (xreal*)in, (xreal*)out,
96- (forward ? SLEEF_MODE_FORWARD : SLEEF_MODE_BACKWARD) | SLEEF_MODE_MEASURE /* | SLEEF_MODE_VERBOSE*/ | (mt ? 0 : SLEEF_MODE_NO_MT));
97- vector<char > pathstr (1024 );
98- SleefDFT_getPath (plan, pathstr.data (), pathstr.size ());
99- cerr << pathstr.data () << endl;
99+ in = (cplx*)Sleef_malloc (sizeof (cplx) * n * m);
100+ out = (cplx*)Sleef_malloc (sizeof (cplx) * n * m);
101+
102+ uint64_t mode = check ? SLEEF_MODE_ESTIMATE : SLEEF_MODE_MEASURE;
103+ mode |= forward ? SLEEF_MODE_FORWARD : SLEEF_MODE_BACKWARD;
104+ mode |= mt ? 0 : SLEEF_MODE_NO_MT;
105+ // mode |= SLEEF_MODE_VERBOSE;
106+
107+ if (m == 1 ) {
108+ plan = SLEEFDFT_INIT1D (n, (xreal*)in, (xreal*)out, mode);
109+ vector<char > pathstr (1024 );
110+ SleefDFT_getPath (plan, pathstr.data (), pathstr.size ());
111+ cerr << pathstr.data () << endl;
112+ } else {
113+ plan = SLEEFDFT_INIT2D (n, m, (xreal*)in, (xreal*)out, mode);
114+ }
100115 }
101116
102- ~FWSleefDFT () { SleefDFT_dispose (plan); }
117+ ~FWSleefDFT () {
118+ SleefDFT_dispose (plan);
119+ Sleef_free (out);
120+ Sleef_free (in);
121+ }
103122
104123 cplx* getInPtr () { return in ; }
105124 cplx* getOutPtr () { return out; }
@@ -109,18 +128,23 @@ class FWSleefDFT : public FFTFramework<cplx> {
109128
110129template <typename cplx>
111130class FWFFTW3 : public FFTFramework <cplx> {
112- const int n;
131+ const int n, m ;
113132 cplx* in;
114133 cplx* out;
115134 FFTW_PLAN plan;
116135
117136public:
118- FWFFTW3 (int n_, bool forward, bool mt=false ) : n(n_) {
137+ FWFFTW3 (int n_, int m_, bool forward, bool mt, bool check) : n(n_), m(m_) {
138+ // FFTW_CLEANUP();
119139 FFTW_PLAN_WITH_NTHREADS (mt ? omp_get_max_threads () : 1 );
120- in = (cplx*)FFTW_MALLOC (sizeof (FFTW_COMPLEX) * n);
121- out = (cplx*)FFTW_MALLOC (sizeof (FFTW_COMPLEX) * n);
122- plan = FFTW_PLAN_DFT_1D (n, (FFTW_COMPLEX*)in, (FFTW_COMPLEX*)out,
123- forward ? FFTW_FORWARD : FFTW_BACKWARD, FFTW_MEASURE);
140+ in = (cplx*)FFTW_MALLOC (sizeof (FFTW_COMPLEX) * n * m);
141+ out = (cplx*)FFTW_MALLOC (sizeof (FFTW_COMPLEX) * n * m);
142+ unsigned flags = check ? FFTW_ESTIMATE : FFTW_MEASURE;
143+ if (m == 1 ) {
144+ plan = FFTW_PLAN_DFT_1D (n, (FFTW_COMPLEX*)in, (FFTW_COMPLEX*)out, forward ? FFTW_FORWARD : FFTW_BACKWARD, flags);
145+ } else {
146+ plan = FFTW_PLAN_DFT_2D (n, m, (FFTW_COMPLEX*)in, (FFTW_COMPLEX*)out, forward ? FFTW_FORWARD : FFTW_BACKWARD, flags);
147+ }
124148 }
125149
126150 ~FWFFTW3 () {
@@ -137,78 +161,83 @@ class FWFFTW3 : public FFTFramework<cplx> {
137161
138162int main (int argc, char **argv) {
139163 if (argc == 1 ) {
140- fprintf (stderr, " %s <log2n> <plan file name > <plan string >\n " , argv[0 ]);
164+ fprintf (stderr, " %s <log2n> <log2m> <measurement time in ms > <nrepeat >\n " , argv[0 ]);
141165 exit (-1 );
142166 }
143167
144168 fftw_init_threads ();
145169
146170 double measureTimeMillis = 3000 ;
171+ if (argc >= 4 ) measureTimeMillis = atof (argv[3 ]);
147172
148- int backward = 0 ;
173+ bool forward = true ;
149174
150175 int log2n = atoi (argv[1 ]);
151176 if (log2n < 0 ) {
152- backward = 1 ;
177+ forward = false ;
153178 log2n = -log2n;
154179 }
155180
156181 const int n = 1 << log2n;
157- const int nrepeat = 8 ;
182+
183+ const int log2m = argc >= 3 ? atoi (argv[2 ]) : 0 ;
184+ const int m = 1 << log2m;
185+
186+ cerr << " n = " << n << " , m = " << m << " , " << (forward ? " forward" : " backward" ) << endl;
187+
188+ const int nrepeat = argc >= 5 ? atoi (argv[4 ]) : 4 ;
158189
159190 vector<double > mflops_sleefdftst, mflops_fftwst, mflops_sleefdftmt, mflops_fftwmt;
160191
161- #ifdef CHECK
192+ vector<complex <xreal>> v (n * m);
193+ for (int i=0 ;i<n * m;i++) {
194+ v[i] = (2.0 * (rand () / (double )RAND_MAX) - 1 ) + (2.0 * (rand () / (double )RAND_MAX) - 1 ) * 1i;
195+ }
196+
162197 {
163- // Test if we are really computing the same values
198+ // Check if we are really computing the same values
164199
165- auto sleefdft = make_shared<FWSleefDFT<complex <xreal>>>(n, true );
166- auto fftw = make_shared<FWFFTW3 <complex <xreal>>>(n, true );
200+ auto sleefdft = make_shared<FWSleefDFT<complex <xreal>>>(n, m, forward, false , true );
201+ auto fftw = make_shared<FWFFTW3 <complex <xreal>>>(n, m, forward, false , true );
167202
168203 complex <xreal> *in0 = sleefdft->getInPtr ();
169204 complex <xreal> *out0 = sleefdft->getOutPtr ();
170205 complex <xreal> *in1 = fftw->getInPtr ();
171206 complex <xreal> *out1 = fftw->getOutPtr ();
172207
173- for (int i=0 ;i<n;i++) {
174- in0[i] = in1[i] = (2.0 * (rand () / (double )RAND_MAX) - 1 ) + (2.0 * (rand () / (double )RAND_MAX) - 1 ) * 1i;
175- }
208+ for (int i=0 ;i<n * m;i++) in0[i] = in1[i] = v[i];
176209
177210 sleefdft->execute ();
178211 fftw ->execute ();
179212
180- for (int i=0 ;i<n;i++) {
213+ for (int i=0 ;i<n * m ;i++) {
181214 if (std::real (abs ((out0[i] - out1[i]) * (out0[i] - out1[i]))) > 0.1 ) {
182215 cerr << " NG " << i << " : " << out0[i] << " , " << out1[i] << endl;
183216 exit (-1 );
184217 }
185218 }
219+
220+ cerr << " Check OK" << endl;
186221 }
187- #endif
188222
189223 for (int nr = 0 ;nr < nrepeat;nr++) {
190- cerr << endl << " n = " << n << " (" << log2n << " ), nr = " << nr << endl;
191-
192- cerr << " Planning SleefDFT ST" << endl;
193- auto sleefdftst = make_shared<FWSleefDFT<complex <xreal>>>(n, true , false );
194- cerr << " Planning FFTW ST" << endl;
195- auto fftwst = make_shared<FWFFTW3 <complex <xreal>>>(n, true , false );
196- cerr << " Planning SleefDFT MT" << endl;
197- auto sleefdftmt = make_shared<FWSleefDFT<complex <xreal>>>(n, true , true );
198- cerr << " Planning FFTW MT" << endl;
199- auto fftwmt = make_shared<FWFFTW3 <complex <xreal>>>(n, true , true );
200- cerr << " Planning done" << endl;
201-
202- complex <xreal> *in0 = sleefdftst->getInPtr ();
203- complex <xreal> *in1 = fftwst->getInPtr ();
204-
205- for (int i=0 ;i<n;i++) {
206- in0[i] = in1[i] = (2.0 * (rand () / (double )RAND_MAX) - 1 ) + (2.0 * (rand () / (double )RAND_MAX) - 1 ) * 1i;
207- }
224+ cerr << endl;
225+ #if BASETYPEID == 1
226+ cerr << " DP " ;
227+ #elif BASETYPEID == 2
228+ cerr << " SP " ;
229+ #endif
230+ cerr << " n = 2^" << log2n << " = " << n << " , m = 2^" << log2m << " = " << m << " , nr = " << nr << endl;
208231
209232 //
210233
211234 {
235+ cerr << " Planning SleefDFT ST" << endl;
236+ auto sleefdftst = make_shared<FWSleefDFT<complex <xreal>>>(n, m, forward, false , false );
237+
238+ complex <xreal> *in0 = sleefdftst->getInPtr ();
239+ for (int i=0 ;i<n * m;i++) in0[i] = v[i];
240+
212241 auto niter = sleefdftst->niter (1000LL * 1000 * measureTimeMillis);
213242
214243 cerr << " SleefDFT ST niter = " << niter << endl;
@@ -220,6 +249,7 @@ int main(int argc, char **argv) {
220249 int64_t tm1 = timens ();
221250
222251 double mflops = 5 * n * log2n / ((tm1 - tm0) / (double (niter)*1000 ));
252+ if (m != 1 ) mflops *= m * log2m;
223253
224254 fprintf (stderr, " %g Mflops\n " , mflops);
225255
@@ -229,6 +259,12 @@ int main(int argc, char **argv) {
229259 //
230260
231261 {
262+ cerr << " Planning FFTW ST" << endl;
263+ auto fftwst = make_shared<FWFFTW3<complex <xreal>>>(n, m, forward, false , false );
264+
265+ complex <xreal> *in0 = fftwst->getInPtr ();
266+ for (int i=0 ;i<n * m;i++) in0[i] = v[i];
267+
232268 auto niter = fftwst->niter (1000LL * 1000 * measureTimeMillis);
233269
234270 cerr << " FFTW ST niter = " << niter << endl;
@@ -240,6 +276,7 @@ int main(int argc, char **argv) {
240276 int64_t tm1 = timens ();
241277
242278 double mflops = 5 * n * log2n / ((tm1 - tm0) / (double (niter)*1000 ));
279+ if (m != 1 ) mflops *= m * log2m;
243280
244281 fprintf (stderr, " %g Mflops\n " , mflops);
245282
@@ -249,6 +286,12 @@ int main(int argc, char **argv) {
249286 //
250287
251288 {
289+ cerr << " Planning SleefDFT MT" << endl;
290+ auto sleefdftmt = make_shared<FWSleefDFT<complex <xreal>>>(n, m, forward, true , false );
291+
292+ complex <xreal> *in0 = sleefdftmt->getInPtr ();
293+ for (int i=0 ;i<n * m;i++) in0[i] = v[i];
294+
252295 auto niter = sleefdftmt->niter (1000LL * 1000 * measureTimeMillis);
253296
254297 cerr << " SleefDFT MT niter = " << niter << endl;
@@ -260,6 +303,7 @@ int main(int argc, char **argv) {
260303 int64_t tm1 = timens ();
261304
262305 double mflops = 5 * n * log2n / ((tm1 - tm0) / (double (niter)*1000 ));
306+ if (m != 1 ) mflops *= m * log2m;
263307
264308 fprintf (stderr, " %g Mflops\n " , mflops);
265309
@@ -269,6 +313,12 @@ int main(int argc, char **argv) {
269313 //
270314
271315 {
316+ cerr << " Planning FFTW MT" << endl;
317+ auto fftwmt = make_shared<FWFFTW3<complex <xreal>>>(n, m, forward, true , false );
318+
319+ complex <xreal> *in0 = fftwmt->getInPtr ();
320+ for (int i=0 ;i<n * m;i++) in0[i] = v[i];
321+
272322 auto niter = fftwmt->niter (1000LL * 1000 * measureTimeMillis);
273323
274324 cerr << " FFTW MT niter = " << niter << endl;
@@ -280,14 +330,15 @@ int main(int argc, char **argv) {
280330 int64_t tm1 = timens ();
281331
282332 double mflops = 5 * n * log2n / ((tm1 - tm0) / (double (niter)*1000 ));
333+ if (m != 1 ) mflops *= m * log2m;
283334
284335 fprintf (stderr, " %g Mflops\n " , mflops);
285336
286337 mflops_fftwmt.push_back (mflops);
287338 }
288339 }
289340
290- cout << log2n << " , " ;
341+ cout << log2n << " , " << log2m << " , " ;
291342
292343 {
293344 double f = 0 ;
0 commit comments