SAF
Loading...
Searching...
No Matches
saf_externals.h
Go to the documentation of this file.
1/*
2 * Copyright 2020 Leo McCormack
3 *
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10 * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
13 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14 * PERFORMANCE OF THIS SOFTWARE.
15 */
16
67
68#ifndef __SAF_EXTERNALS_H_INCLUDED__
69#define __SAF_EXTERNALS_H_INCLUDED__
70
71#ifdef __cplusplus
72extern "C" {
73#endif /* __cplusplus */
74
75/* ========================================================================== */
76/* Performance Library to Employ */
77/* ========================================================================== */
78
79/* Assert that only one CBLAS/LAPACK performance library has been specified */
80#if (defined(SAF_USE_INTEL_MKL_LP64) + \
81 defined(SAF_USE_INTEL_MKL_ILP64) + \
82 defined(SAF_USE_OPEN_BLAS_AND_LAPACKE) + \
83 defined(SAF_USE_ATLAS) + \
84 defined(SAF_USE_GSL) + \
85 defined(SAF_USE_APPLE_ACCELERATE_LP64) + \
86 defined(SAF_USE_APPLE_ACCELERATE_ILP64)) != 1
87# error One (and only one) performance library flag should be defined!
88#endif
89
90/*
91 * Due to the nature of spatial/multi-channel audio signal processing, SAF is
92 * required to employ heavy use of linear algebra operations. Therefore, the
93 * framework has been written from the ground up to conform to the CBLAS and
94 * LAPACK standards, of which there a number of highly optimised performance
95 * libraries available:
96 */
97#if defined(SAF_USE_INTEL_MKL_LP64)
98/*
99 * Using Intel's Math Kernel Library (MKL) LP64 configuration (32-bit int)
100 * (Generally the fastest library for x86 based architectures)
101 *
102 * Note that Intel MKL not only supports CBLAS and LAPACK, but also offers:
103 * - a highly optimised discrete/fast Fourier transform (DFT/FFT), which is
104 * used by the saf_utility_fft wrapper [unless this is overriden by the Intel
105 * IPP implementation (SAF_USE_INTEL_IPP), or FFTW (SAF_USE_FFTW)].
106 * - a number of additional vector, vector-vector, vector-scalar operations
107 * that are not covered by the CBLAS standard; such as: hadamard products,
108 * element-wise additions/subtractions, and the modulus or reciprical of
109 * all vector elements, etc.
110 */
111# include "mkl.h"
112
113#elif defined(SAF_USE_INTEL_MKL_ILP64)
114/*
115 * Using Intel's Math Kernel Library (MKL) ILP64 configuration (64-bit int)
116 * (Generally the fastest library for x86 based architectures)
117 *
118 * This 64-bit int version is the one employed by e.g. MATLAB. Therefore, it is
119 * required if you wish to build MEX objects using SAF code; see e.g.
120 * extras/safmex. In general, the performance of this option is practically the
121 * same as "SAF_USE_INTEL_MKL_LP64", but it is slower in some very rare special
122 * cases. Therefore, "SAF_USE_INTEL_MKL_LP64" is still the favoured option if
123 * you are not planning on building MEX objects using SAF.
124 */
125# define MKL_ILP64
126# include "mkl.h"
127
128#elif defined(SAF_USE_OPEN_BLAS_AND_LAPACKE)
129/*
130 * Using OpenBLAS and the LAPACKE interface
131 * (A decent option for both x86 and ARM based architectures)
132 *
133 * This option provides implementations of the CBLAS/LAPACK functions which have
134 * decent performance. However, unlike Intel MKL or Apple Accelerate, it does
135 * not offer an optimised DFT/FFT or any other linear algebra functions outside
136 * of these standards. Therefore, consider also using Intel's IPP library or
137 * FFTW for the DFT/FFT with: "SAF_USE_INTEL_IPP" or "SAF_USE_FFTW"
138 *
139 * Note that "SAF_USE_INTEL_IPP" also offers support for certain linear algebra
140 * operations not covered by the CBLAS/LAPACK standards, which SAF can leverage.
141 *
142 * Alternatively, SSE/AVX/AVX-512 fallback implementations for certain linear
143 * algebra operations may be enabled with: "SAF_ENABLE_SIMD"
144 *
145 * More information regarding these additional options can be found below.
146 */
147# include "cblas.h"
148# include "lapacke.h"
149
150#elif defined(SAF_USE_ATLAS)
151/*
152 * Using the Automatically Tuned Linear Algebra Software (ATLAS) library
153 * (Not recommended, since some saf_utility_veclib functions do not work with
154 * ATLAS)
155 *
156 * Basically, do not use this unless you have to, and if you do, be aware that
157 * some linear algebra functions in saf_utility_veclib will exit the program if
158 * they are called.
159 */
160# include "cblas-atlas.h"
161# include "clapack.h"
162# warning Note: CLAPACK does not include all LAPACK routines!
163
164#elif defined(__APPLE__) && defined(SAF_USE_APPLE_ACCELERATE_LP64)
165/*
166 * Using Apple's Accelerate library
167 * (Solid choice for both x86 and ARM, but only works under MacOSX and is not
168 * quite as fast as Intel MKL for x86 systems)
169 *
170 * Adding ACCELERATE_NEW_LAPACK (supported in MacOS 13.3 or newer) is optional.
171 *
172 * Note that Apple Accelerate not only supports CBLAS and LAPACK, but also
173 * offers:
174 * - an optimised discrete/fast Fourier transform (DFT/FFT), which is used by
175 * the saf_utility_fft wrapper [unless this is overriden by the Intel IPP
176 * implementation (SAF_USE_INTEL_IPP), or FFTW (SAF_USE_FFTW)].
177 * - a number of additional vector, vector-vector, vector-scalar operations
178 * that are not covered by the CBLAS standard; such as hadamard products,
179 * element-wise additions/subtractions, etc.
180 *
181 * Unlike e.g. Intel MKL's DFT, not all even number DFT lengths are supported by
182 * vDSP. Therefore, be aware that the default kissFFT library (included in
183 * framework/resources) is still used as a fall-back option in such cases.
184 */
185# include "Accelerate/Accelerate.h"
186
187#elif defined(__APPLE__) && defined(SAF_USE_APPLE_ACCELERATE_ILP64)
188/*
189 * Using Apple's Accelerate library
190 * (Solid choice for both x86 and ARM, but only works under MacOSX and is not
191 * quite as fast as Intel MKL for x86 systems)
192 *
193 * Adding ACCELERATE_NEW_LAPACK (supported in MacOS 13.3 or newer) is required.
194 * Adding ACCELERATE_LAPACK_ILP64 (supported in MacOS 13.3 or newer) is required.
195 */
196# if !defined(ACCELERATE_NEW_LAPACK)
197# error ACCELERATE_NEW_LAPACK should also be added to preprocessor definitions
198# endif
199# if !defined(ACCELERATE_LAPACK_ILP64)
200# error ACCELERATE_LAPACK_ILP64 should also be added to preprocessor definitions
201# endif
202# include "Accelerate/Accelerate.h"
203
204#elif defined(SAF_USE_GSL)
205/*
206 * Using the GNU Scientific Library (GSL)
207 *
208 * Please feel free to try it out and report back. Note also that certain LAPACK
209 * functions used in saf_utility_veclib will need to be swapped out for
210 * equivalent functions in GSL
211 */
212# error Using GNU Scientific Library (GSL) is currently unsupported/incomplete
213# include "gsl_cblas.h"
214
215#else
216/*
217 * If you would like to use some other CBLAS/LAPACK supporting library then
218 * please get in touch! :-)
219 */
220# error SAF requires a library (or libraries) which supports CBLAS and LAPACK
221#endif
222
223
224/* ========================================================================== */
225/* Optional External Libraries */
226/* ========================================================================== */
227
228#if defined(SAF_USE_INTEL_IPP)
229/*
230 * The use of Intel's Integrated Performance Primitives (IPP) is optional, but
231 * does lead to improvements in the following:
232 * - slightly faster DFT/FFT (for saf_utility_fft) compared with the
233 * implementation found in Intel MKL, which are both faster than the DFT/FFT
234 * implementations found in Apple Accelerate vDSP and FFTW.
235 * - this overrides the included resources/speex_resampler with the IPP
236 * resampler, which is marginally faster and more accurate.
237 * - this also overrides certain vector-vector, and vector-scalar operations,
238 * such as element-wise multiplication, addition, scaling etc.
239 *
240 * Note that the IPP DFT/FFT is overriden by FFTW if SAF_USE_FFTW is defined.
241 */
242# include "ipp.h"
243#endif
244
245#if defined(SAF_USE_FFTW)
246/*
247 * The use of FFTW is optional, but it is faster than the default kissFFT
248 * DFT/FFT implementation. However, if you are on an x86 CPU then the DFT/FFT
249 * implementations found in Intel IPP, Intel MKL and Apple Accelerate are
250 * usually faster options.
251 *
252 * Note, SAF uses the single-precision version (fftw3f), which is built with:
253 * $ ./configure --enable-float
254 * $ make
255 *
256 * If SAF_USE_FFTW is defined, then FFTW overrides all of the other available
257 * DFT/FFT implementations in the saf_utility_fft wrapper.
258 */
259# include "fftw3.h"
260#endif
261
262#if defined(SAF_ENABLE_SIMD)
263/*
264 * SAF heavily favours the use of optimised linear algebra routines provided by
265 * e.g. Intel MKL or Accelerate, since they optimally employ vectorisation
266 * (with SSE/AVX etc.). However, in cases where the employed performance library
267 * does not offer an implementation for a particular routine, SAF provides fall-
268 * back option(s).
269 * SIMD accelerated fall-back options may be enabled with: SAF_ENABLE_SIMD
270 *
271 * By default SSE, SSE2, and SSE3 intrinsics are employed, unless one of the
272 * following compiler flags are given:
273 * - AVX/AVX2 intrinsics are enabled with: -mavx2
274 * - AVX-512 intrinsics are enabled with: -mavx512f
275 *
276 * Note that intrinsics require a CPU that supports them (x86_64 architecture)
277 * To find out which SIMD intrinsics are supported by your own CPU, use the
278 * following terminal command on macOS: $ sysctl -a | grep machdep.cpu.features
279 * Or on Linux, use: $ lscpu
280 */
281# if (defined(__AVX__) && defined(__AVX2__)) || defined(__AVX512F__)
282/*
283 * Note that AVX/AVX2 requires the '-mavx2' compiler flag
284 * Whereas AVX-512 requires the '-mavx512f' compiler flag
285 */
286# include <immintrin.h> /* for AVX, AVX2, and/or AVX-512 */
287# endif
288# if defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__)
289# include <xmmintrin.h> /* for SSE */
290# include <emmintrin.h> /* for SSE2 */
291# include <pmmintrin.h> /* for SSE3 */
292# else
293# error SAF_ENABLE_SIMD requires at least SSE, SSE2 and SSE3 support
294# endif
295#endif
296
297#if defined(SAF_ENABLE_SOFA_READER_MODULE)
298/*
299 * The built-in saf_sofa_open() SOFA file reader has two implementations:
300 * - By default, the function wraps around the "libmysofa" library
301 * (BSD-3-Clause license), which depends on only zlib (which is included
302 * in framework/resources/zlib). The downsides of this option, is that zlib
303 * has file size limits for each chunk (<4GB) and it is quite slow at
304 * decompressing large files.
305 * - If SAF_ENABLE_NETCDF is defined, then an alternative SOFA reader may be
306 * used. This version requires netcdf to be linked to SAF, along with its
307 * dependencies. The netcdf loader gets around the file size limits of
308 * the libmysofa loader and is also approximately 3 times faster.
309 * Therefore, if you intend to load many large SOFA files
310 * (especially microphone arrays or Ambisonic IRs), then this alternative
311 * SOFA reader is either required (to get around the file size limit) or
312 * may be preferred due to the shorter loading times. The downsides of
313 * using the netcdf option is that it is NOT thread-safe! and requires
314 * these additional external libraries to be linked to SAF.
315 *
316 * Note that the "mysofa" interface, e.g. mysofa_load(), may also be called
317 * directly, rather than using saf_sofa_open().
318 */
319# ifdef SAF_ENABLE_NETCDF
320# include <netcdf.h>
321# endif
322#endif
323
324
325/* ========================================================================== */
326/* Configuration and Status Flags/Strings */
327/* ========================================================================== */
328
329/* Currently employed performance library: */
330#if defined(SAF_USE_INTEL_MKL_LP64)
331# define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "Intel MKL (LP64)"
332#elif defined(SAF_USE_INTEL_MKL_ILP64)
333# define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "Intel MKL (ILP64)"
334#elif defined(SAF_USE_OPEN_BLAS_AND_LAPACKE)
335# define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "OpenBLAS with LAPACKE"
336#elif defined(SAF_USE_ATLAS)
337# define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "ATLAS"
338#elif defined(__APPLE__) && defined(SAF_USE_APPLE_ACCELERATE_LP64)
339# define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "Apple Accelerate (LP64)"
340#elif defined(__APPLE__) && defined(SAF_USE_APPLE_ACCELERATE_ILP64)
341# define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "Apple Accelerate (ILP64)"
342#else
343# define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "NONE"
344#endif
345
346/* Status of Intel IPP */
347#if defined(SAF_USE_INTEL_IPP)
348# define SAF_INTEL_IPP_STATUS_STRING "Enabled"
349#else
350# define SAF_INTEL_IPP_STATUS_STRING "Disabled"
351#endif
352
353/* Status of FFTW */
354#if defined(SAF_USE_FFTW)
355# define SAF_FFTW_STATUS_STRING "Enabled"
356#else
357# define SAF_FFTW_STATUS_STRING "Disabled"
358#endif
359
360/* Status of SIMD intrinsics */
361#if defined(SAF_ENABLE_SIMD)
362# define SAF_SIMD_STATUS_STRING "Enabled"
363/* Which SIMD intrinsics are currently enabled? */
364# if defined(__AVX512F__)
365# define SAF_ENABLED_SIMD_INTRINSICS_STRING "SSE, SSE2, SSE3, AVX, AVX2, AVX512F"
366# elif defined(__AVX__) && defined(__AVX2__)
367# define SAF_ENABLED_SIMD_INTRINSICS_STRING "SSE, SSE2, SSE3, AVX, AVX2"
368# elif defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__)
369# define SAF_ENABLED_SIMD_INTRINSICS_STRING "SSE, SSE2, SSE3"
370# else
371# define SAF_ENABLED_SIMD_INTRINSICS_STRING "None"
372# endif
373#else
374# define SAF_SIMD_STATUS_STRING "Disabled"
375# define SAF_ENABLED_SIMD_INTRINSICS_STRING "None"
376#endif
377
378/* Status of netCDF */
379#if defined(SAF_ENABLE_NETCDF)
380# define SAF_NETCDF_STATUS_STRING "Enabled"
381#else
382# define SAF_NETCDF_STATUS_STRING "Disabled"
383#endif
384
386#define SAF_EXTERNALS_CONFIGURATION_STRING \
387 "Current SAF externals configuration: " "\n" \
388 " - Performance library: " SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "\n" \
389 " - Intel IPP status: " SAF_INTEL_IPP_STATUS_STRING "\n" \
390 " - FFTW status: " SAF_FFTW_STATUS_STRING "\n" \
391 " - SIMD status: " SAF_SIMD_STATUS_STRING "\n" \
392 " - Enabled intrinsics: " SAF_ENABLED_SIMD_INTRINSICS_STRING "\n" \
393 " - netCDF status: " SAF_NETCDF_STATUS_STRING "\n"
394
395#ifdef __cplusplus
396} /* extern "C" */
397#endif /* __cplusplus */
398
399#endif /* __SAF_EXTERNALS_H_INCLUDED__ */