SAF
saf_externals.h
Go to the documentation of this file.
1 /*
2  * Copyright 2020 Leo McCormack
3  *
4  * Permission to use, copy, modify, and/or distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
9  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10  * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
11  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
13  * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14  * PERFORMANCE OF THIS SOFTWARE.
15  */
16 
66 #ifndef __SAF_EXTERNALS_H_INCLUDED__
67 #define __SAF_EXTERNALS_H_INCLUDED__
68 
69 /* ========================================================================== */
70 /* Performance Library to Employ */
71 /* ========================================================================== */
72 
73 /* Assert that only one CBLAS/LAPACK performance library has been specified */
74 #if (defined(SAF_USE_INTEL_MKL_LP64) + \
75  defined(SAF_USE_INTEL_MKL_ILP64) + \
76  defined(SAF_USE_OPEN_BLAS_AND_LAPACKE) + \
77  defined(SAF_USE_ATLAS) + \
78  defined(SAF_USE_APPLE_ACCELERATE)) > 1
79 # error Only one performance library flag can be defined!
80 #endif
81 
82 /*
83  * Due to the nature of spatial/multi-channel audio signal processing, SAF is
84  * required to employ heavy use of linear algebra operations. Therefore, the
85  * framework has been written from the ground up to conform to the CBLAS and
86  * LAPACK standards, of which there a number of highly optimised performance
87  * libraries available:
88  */
89 #if defined(SAF_USE_INTEL_MKL_LP64)
90 /*
91  * Using Intel's Math Kernel Library (MKL) LP64 configuration (32-bit int)
92  * (Generally the fastest library for x86 based architectures)
93  *
94  * Note that Intel MKL not only supports CBLAS and LAPACK, but also offers:
95  * - a highly optimised discrete/fast Fourier transform (DFT/FFT), which is
96  * used by the saf_utility_fft wrapper.
97  * - a number of additional vector, vector-vector, vector-scalar operations
98  * that are not covered by the CBLAS standard; such as: hadamard products,
99  * element-wise additions/subtractions, and the modulus or reciprical of
100  * all vector elements, etc.
101  */
102 # include "mkl.h"
103 
104 #elif defined(SAF_USE_INTEL_MKL_ILP64)
105 /*
106  * Using Intel's Math Kernel Library (MKL) ILP64 configuration (64-bit int)
107  * (Generally the fastest library for x86 based architectures)
108  *
109  * This 64-bit int version is the one employed by e.g. MATLAB. Therefore, it is
110  * required if you wish to build MEX objects using SAF code; see e.g.
111  * extras/safmex. In general, the performance of this option is practically the
112  * same as "SAF_USE_INTEL_MKL_LP64", but it is slower in some very rare special
113  * cases. Therefore, "SAF_USE_INTEL_MKL_LP64" is still the favoured option if
114  * you are not planning on building MEX objects using SAF.
115  */
116 # define MKL_ILP64
117 # include "mkl.h"
118 
119 #elif defined(SAF_USE_OPEN_BLAS_AND_LAPACKE)
120 /*
121  * Using OpenBLAS and the LAPACKE interface
122  * (A decent option for both x86 and ARM based architectures)
123  *
124  * This option provides implementations of the CBLAS/LAPACK functions which have
125  * decent performance. However, unlike Intel MKL or Apple Accelerate, it does
126  * not offer an optimised DFT/FFT or any other linear algebra functions outside
127  * of these standards. Therefore, consider also using Intel's IPP library or
128  * FFTW for the DFT/FFT with: "SAF_USE_INTEL_IPP" or "SAF_USE_FFTW"
129  *
130  * Note that "SAF_USE_INTEL_IPP" also offers support for certain linear algebra
131  * operations not covered by the CBLAS/LAPACK standards, which SAF can leverage.
132  *
133  * Alternatively, SSE/AVX/AVX-512 fallback implementations for certain linear
134  * algebra operations may be enabled with: "SAF_ENABLE_SIMD"
135  *
136  * More information regarding these additional options can be found below.
137  */
138 # include "cblas.h"
139 # include "lapacke.h"
140 
141 #elif defined(SAF_USE_ATLAS)
142 /*
143  * Using the Automatically Tuned Linear Algebra Software (ATLAS) library
144  * (Not recommended, since some saf_utility_veclib functions do not work with
145  * ATLAS)
146  *
147  * Basically, do not use this unless you have to, and if you do, be aware that
148  * some linear algebra functions in saf_utility_veclib will exit the program if
149  * they are called.
150  */
151 # include "cblas-atlas.h"
152 # include "clapack.h"
153 # warning Note: CLAPACK does not include all LAPACK routines!
154 
155 #elif defined(__APPLE__) && defined(SAF_USE_APPLE_ACCELERATE)
156 /*
157  * Using Apple's Accelerate library
158  * (Solid choice for both x86 and ARM, but only works under MacOSX and is not
159  * quite as fast as Intel MKL for x86 systems)
160  *
161  * Note that Apple Accelerate not only supports CBLAS and LAPACK, but also
162  * offers:
163  * - an optimised discrete/fast Fourier transform (DFT/FFT), which is used by
164  * the saf_utility_fft wrapper.
165  * - a number of additional vector, vector-vector, vector-scalar operations
166  * that are not covered by the CBLAS standard; such as hadamard products,
167  * element-wise additions/subtractions, etc.
168  *
169  * Unlike e.g. Intel MKL's DFT, not all even number DFT lengths are supported by
170  * vDSP. Therefore, be aware that the default kissFFT library (included in
171  * framework/resources) is still used as a fall-back option in such cases.
172  */
173 # include "Accelerate/Accelerate.h"
174 
175 #else
176 /*
177  * If you would like to use some other CBLAS/LAPACK supporting library then
178  * please get in touch! :-)
179  */
180 # error SAF requires a library (or libraries) which supports CBLAS and LAPACK
181 #endif
182 
183 
184 /* ========================================================================== */
185 /* Optional External Libraries */
186 /* ========================================================================== */
187 
188 #if defined(SAF_USE_INTEL_IPP)
189 /*
190  * The use of Intel's Integrated Performance Primitives (IPP) is optional, but
191  * does lead to improvements in the following:
192  * - slightly faster DFT/FFT (for saf_utility_fft) compared with the
193  * implementation found in Intel MKL, which are both faster than the DFT/FFT
194  * implementations found in Apple Accelerate vDSP and FFTW.
195  * - this overrides the included resources/speex_resampler with the IPP
196  * resampler, which is marginally faster and more accurate.
197  * - this also overrides certain vector-vector, and vector-scalar operations,
198  * such as element-wise multiplication, addition, scaling etc.
199  *
200  * Note that the IPP DFT/FFT is overriden by FFTW if SAF_USE_FFTW is defined.
201  */
202 # include "ipp.h"
203 #endif
204 
205 #if defined(SAF_USE_FFTW)
206 /*
207  * The use of FFTW is optional, but it is faster than the default kissFFT
208  * DFT/FFT implementation. However, if you are on an x86 CPU then the DFT/FFT
209  * implementations found in Intel IPP, Intel MKL and Apple Accelerate are
210  * usually faster options.
211  *
212  * Note, SAF uses the single-precision version (fftw3f), which is built with:
213  * $ ./configure --enable-float
214  * $ make
215  *
216  * If SAF_USE_FFTW is defined, then FFTW overrides all of the other available
217  * DFT/FFT implementations in the saf_utility_fft wrapper.
218  */
219 # include "fftw3.h"
220 #endif
221 
222 #if defined(SAF_ENABLE_SIMD)
223 /*
224  * SAF heavily favours the use of optimised linear algebra routines provided by
225  * e.g. Intel MKL or Accelerate, since they optimally employ vectorisation
226  * (with SSE/AVX etc.). However, in cases where the employed performance library
227  * does not offer an implementation for a particular routine, SAF provides fall-
228  * back option(s).
229  * SIMD accelerated fall-back options may be enabled with: SAF_ENABLE_SIMD
230  *
231  * By default SSE, SSE2, and SSE3 intrinsics are employed, unless one of the
232  * following compiler flags are given:
233  * - AVX/AVX2 intrinsics are enabled with: -mavx2
234  * - AVX-512 intrinsics are enabled with: -mavx512f
235  *
236  * Note that intrinsics require a CPU that supports them (x86_64 architecture)
237  * To find out which SIMD intrinsics are supported by your own CPU, use the
238  * following terminal command on macOS: $ sysctl -a | grep machdep.cpu.features
239  * Or on Linux, use: $ lscpu
240  */
241 # if (defined(__AVX__) && defined(__AVX2__)) || defined(__AVX512F__)
242 /*
243  * Note that AVX/AVX2 requires the '-mavx2' compiler flag
244  * Whereas AVX-512 requires the '-mavx512f' compiler flag
245  */
246 # include <immintrin.h> /* for AVX, AVX2, and/or AVX-512 */
247 # endif
248 # if defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__)
249 # include <xmmintrin.h> /* for SSE */
250 # include <emmintrin.h> /* for SSE2 */
251 # include <pmmintrin.h> /* for SSE3 */
252 # else
253 # error SAF_ENABLE_SIMD requires at least SSE, SSE2 and SSE3 support
254 # endif
255 #endif
256 
257 
258 /* ========================================================================== */
259 /* External Libraries Required by the Optional saf_sofa_reader Module */
260 /* ========================================================================== */
261 
262 #if defined(SAF_ENABLE_SOFA_READER_MODULE)
263 /*
264  * If your compiler stopped at this point, then please add the path for the
265  * netcdf include file to your project's include header paths.
266  * Instructions for linking the required "netcdf" library may also be found
267  * here: docs/SOFA_READER_MODULE_DEPENDENCIES.md
268  */
269 # include <netcdf.h>
270 #endif
271 
272 
273 /* ========================================================================== */
274 /* Configuration and Status Flags/Strings */
275 /* ========================================================================== */
276 
277 /* Currently employed performance library: */
278 #if defined(SAF_USE_INTEL_MKL_LP64)
279 # define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "Intel MKL (LP64)"
280 #elif defined(SAF_USE_INTEL_MKL_ILP64)
281 # define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "Intel MKL (ILP64)"
282 #elif defined(SAF_USE_OPEN_BLAS_AND_LAPACKE)
283 # define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "OpenBLAS with LAPACKE"
284 #elif defined(SAF_USE_ATLAS)
285 # define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "ATLAS"
286 #elif defined(__APPLE__) && defined(SAF_USE_APPLE_ACCELERATE)
287 # define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "Apple Accelerate"
288 #else
289 # define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "NONE"
290 #endif
291 
292 /* Status of Intel IPP */
293 #if defined(SAF_USE_INTEL_IPP)
294 # define SAF_INTEL_IPP_STATUS_STRING "Enabled"
295 #else
296 # define SAF_INTEL_IPP_STATUS_STRING "Disabled"
297 #endif
298 
299 /* Status of FFTW */
300 #if defined(SAF_USE_FFTW)
301 # define SAF_FFTW_STATUS_STRING "Enabled"
302 #else
303 # define SAF_FFTW_STATUS_STRING "Disabled"
304 #endif
305 
306 /* Status of SIMD intrinsics */
307 #if defined(SAF_ENABLE_SIMD)
308 # define SAF_SIMD_STATUS_STRING "Enabled"
309 /* Which SIMD intrinsics are currently enabled? */
310 # if defined(__AVX512F__)
311 # define SAF_ENABLED_SIMD_INTRINSICS_STRING "SSE, SSE2, SSE3, AVX, AVX2, AVX512F"
312 # elif defined(__AVX__) && defined(__AVX2__)
313 # define SAF_ENABLED_SIMD_INTRINSICS_STRING "SSE, SSE2, SSE3, AVX, AVX2"
314 # elif defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__)
315 # define SAF_ENABLED_SIMD_INTRINSICS_STRING "SSE, SSE2, SSE3"
316 # else
317 # define SAF_ENABLED_SIMD_INTRINSICS_STRING "None"
318 # endif
319 #else
320 # define SAF_SIMD_STATUS_STRING "Disabled"
321 # define SAF_ENABLED_SIMD_INTRINSICS_STRING "None"
322 #endif
323 
324 /* Status of netCDF */
325 #if defined(SAF_ENABLE_SOFA_READER_MODULE)
326 # define SAF_NETCDF_STATUS_STRING "Enabled"
327 #else
328 # define SAF_NETCDF_STATUS_STRING "Disabled"
329 #endif
330 
332 #define SAF_EXTERNALS_CONFIGURATION_STRING \
333  "Current SAF externals configuration: " "\n" \
334  " - Performance library: " SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "\n" \
335  " - Intel IPP status: " SAF_INTEL_IPP_STATUS_STRING "\n" \
336  " - FFTW status: " SAF_FFTW_STATUS_STRING "\n" \
337  " - SIMD status: " SAF_SIMD_STATUS_STRING "\n" \
338  " - Enabled intrinsics: " SAF_ENABLED_SIMD_INTRINSICS_STRING "\n" \
339  " - netCDF status: " SAF_NETCDF_STATUS_STRING "\n"
340 
341 
342 #endif /* __SAF_EXTERNALS_H_INCLUDED__ */