SAF
Loading...
Searching...
No Matches
saf_externals.h
Go to the documentation of this file.
1/*
2 * Copyright 2020 Leo McCormack
3 *
4 * Permission to use, copy, modify, and/or distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
9 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
10 * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
11 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
12 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
13 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
14 * PERFORMANCE OF THIS SOFTWARE.
15 */
16
66#ifndef __SAF_EXTERNALS_H_INCLUDED__
67#define __SAF_EXTERNALS_H_INCLUDED__
68
69#ifdef __cplusplus
70extern "C" {
71#endif /* __cplusplus */
72
73/* ========================================================================== */
74/* Performance Library to Employ */
75/* ========================================================================== */
76
77/* Assert that only one CBLAS/LAPACK performance library has been specified */
78#if (defined(SAF_USE_INTEL_MKL_LP64) + \
79 defined(SAF_USE_INTEL_MKL_ILP64) + \
80 defined(SAF_USE_OPEN_BLAS_AND_LAPACKE) + \
81 defined(SAF_USE_ATLAS) + \
82 defined(SAF_USE_GSL) + \
83 defined(SAF_USE_APPLE_ACCELERATE)) != 1
84# error One (and only one) performance library flag should be defined!
85#endif
86
87/*
88 * Due to the nature of spatial/multi-channel audio signal processing, SAF is
89 * required to employ heavy use of linear algebra operations. Therefore, the
90 * framework has been written from the ground up to conform to the CBLAS and
91 * LAPACK standards, of which there a number of highly optimised performance
92 * libraries available:
93 */
94#if defined(SAF_USE_INTEL_MKL_LP64)
95/*
96 * Using Intel's Math Kernel Library (MKL) LP64 configuration (32-bit int)
97 * (Generally the fastest library for x86 based architectures)
98 *
99 * Note that Intel MKL not only supports CBLAS and LAPACK, but also offers:
100 * - a highly optimised discrete/fast Fourier transform (DFT/FFT), which is
101 * used by the saf_utility_fft wrapper [unless this is overriden by the Intel
102 * IPP implementation (SAF_USE_INTEL_IPP), or FFTW (SAF_USE_FFTW)].
103 * - a number of additional vector, vector-vector, vector-scalar operations
104 * that are not covered by the CBLAS standard; such as: hadamard products,
105 * element-wise additions/subtractions, and the modulus or reciprical of
106 * all vector elements, etc.
107 */
108# include "mkl.h"
109
110#elif defined(SAF_USE_INTEL_MKL_ILP64)
111/*
112 * Using Intel's Math Kernel Library (MKL) ILP64 configuration (64-bit int)
113 * (Generally the fastest library for x86 based architectures)
114 *
115 * This 64-bit int version is the one employed by e.g. MATLAB. Therefore, it is
116 * required if you wish to build MEX objects using SAF code; see e.g.
117 * extras/safmex. In general, the performance of this option is practically the
118 * same as "SAF_USE_INTEL_MKL_LP64", but it is slower in some very rare special
119 * cases. Therefore, "SAF_USE_INTEL_MKL_LP64" is still the favoured option if
120 * you are not planning on building MEX objects using SAF.
121 */
122# define MKL_ILP64
123# include "mkl.h"
124
125#elif defined(SAF_USE_OPEN_BLAS_AND_LAPACKE)
126/*
127 * Using OpenBLAS and the LAPACKE interface
128 * (A decent option for both x86 and ARM based architectures)
129 *
130 * This option provides implementations of the CBLAS/LAPACK functions which have
131 * decent performance. However, unlike Intel MKL or Apple Accelerate, it does
132 * not offer an optimised DFT/FFT or any other linear algebra functions outside
133 * of these standards. Therefore, consider also using Intel's IPP library or
134 * FFTW for the DFT/FFT with: "SAF_USE_INTEL_IPP" or "SAF_USE_FFTW"
135 *
136 * Note that "SAF_USE_INTEL_IPP" also offers support for certain linear algebra
137 * operations not covered by the CBLAS/LAPACK standards, which SAF can leverage.
138 *
139 * Alternatively, SSE/AVX/AVX-512 fallback implementations for certain linear
140 * algebra operations may be enabled with: "SAF_ENABLE_SIMD"
141 *
142 * More information regarding these additional options can be found below.
143 */
144# include "cblas.h"
145# include "lapacke.h"
146
147#elif defined(SAF_USE_ATLAS)
148/*
149 * Using the Automatically Tuned Linear Algebra Software (ATLAS) library
150 * (Not recommended, since some saf_utility_veclib functions do not work with
151 * ATLAS)
152 *
153 * Basically, do not use this unless you have to, and if you do, be aware that
154 * some linear algebra functions in saf_utility_veclib will exit the program if
155 * they are called.
156 */
157# include "cblas-atlas.h"
158# include "clapack.h"
159# warning Note: CLAPACK does not include all LAPACK routines!
160
161#elif defined(__APPLE__) && defined(SAF_USE_APPLE_ACCELERATE)
162/*
163 * Using Apple's Accelerate library
164 * (Solid choice for both x86 and ARM, but only works under MacOSX and is not
165 * quite as fast as Intel MKL for x86 systems)
166 *
167 * Note that Apple Accelerate not only supports CBLAS and LAPACK, but also
168 * offers:
169 * - an optimised discrete/fast Fourier transform (DFT/FFT), which is used by
170 * the saf_utility_fft wrapper [unless this is overriden by the Intel IPP
171 * implementation (SAF_USE_INTEL_IPP), or FFTW (SAF_USE_FFTW)].
172 * - a number of additional vector, vector-vector, vector-scalar operations
173 * that are not covered by the CBLAS standard; such as hadamard products,
174 * element-wise additions/subtractions, etc.
175 *
176 * Unlike e.g. Intel MKL's DFT, not all even number DFT lengths are supported by
177 * vDSP. Therefore, be aware that the default kissFFT library (included in
178 * framework/resources) is still used as a fall-back option in such cases.
179 */
180# include "Accelerate/Accelerate.h"
181
182#elif defined(SAF_USE_GSL)
183/*
184 * Using the GNU Scientific Library (GSL)
185 *
186 * Please feel free to try it out and report back. Note also that certain LAPACK
187 * functions used in saf_utility_veclib will need to be swapped out for
188 * equivalent functions in GSL
189 */
190# error Using GNU Scientific Library (GSL) is currently unsupported/incomplete
191# include "gsl_cblas.h"
192
193#else
194/*
195 * If you would like to use some other CBLAS/LAPACK supporting library then
196 * please get in touch! :-)
197 */
198# error SAF requires a library (or libraries) which supports CBLAS and LAPACK
199#endif
200
201
202/* ========================================================================== */
203/* Optional External Libraries */
204/* ========================================================================== */
205
206#if defined(SAF_USE_INTEL_IPP)
207/*
208 * The use of Intel's Integrated Performance Primitives (IPP) is optional, but
209 * does lead to improvements in the following:
210 * - slightly faster DFT/FFT (for saf_utility_fft) compared with the
211 * implementation found in Intel MKL, which are both faster than the DFT/FFT
212 * implementations found in Apple Accelerate vDSP and FFTW.
213 * - this overrides the included resources/speex_resampler with the IPP
214 * resampler, which is marginally faster and more accurate.
215 * - this also overrides certain vector-vector, and vector-scalar operations,
216 * such as element-wise multiplication, addition, scaling etc.
217 *
218 * Note that the IPP DFT/FFT is overriden by FFTW if SAF_USE_FFTW is defined.
219 */
220# include "ipp.h"
221#endif
222
223#if defined(SAF_USE_FFTW)
224/*
225 * The use of FFTW is optional, but it is faster than the default kissFFT
226 * DFT/FFT implementation. However, if you are on an x86 CPU then the DFT/FFT
227 * implementations found in Intel IPP, Intel MKL and Apple Accelerate are
228 * usually faster options.
229 *
230 * Note, SAF uses the single-precision version (fftw3f), which is built with:
231 * $ ./configure --enable-float
232 * $ make
233 *
234 * If SAF_USE_FFTW is defined, then FFTW overrides all of the other available
235 * DFT/FFT implementations in the saf_utility_fft wrapper.
236 */
237# include "fftw3.h"
238#endif
239
240#if defined(SAF_ENABLE_SIMD)
241/*
242 * SAF heavily favours the use of optimised linear algebra routines provided by
243 * e.g. Intel MKL or Accelerate, since they optimally employ vectorisation
244 * (with SSE/AVX etc.). However, in cases where the employed performance library
245 * does not offer an implementation for a particular routine, SAF provides fall-
246 * back option(s).
247 * SIMD accelerated fall-back options may be enabled with: SAF_ENABLE_SIMD
248 *
249 * By default SSE, SSE2, and SSE3 intrinsics are employed, unless one of the
250 * following compiler flags are given:
251 * - AVX/AVX2 intrinsics are enabled with: -mavx2
252 * - AVX-512 intrinsics are enabled with: -mavx512f
253 *
254 * Note that intrinsics require a CPU that supports them (x86_64 architecture)
255 * To find out which SIMD intrinsics are supported by your own CPU, use the
256 * following terminal command on macOS: $ sysctl -a | grep machdep.cpu.features
257 * Or on Linux, use: $ lscpu
258 */
259# if (defined(__AVX__) && defined(__AVX2__)) || defined(__AVX512F__)
260/*
261 * Note that AVX/AVX2 requires the '-mavx2' compiler flag
262 * Whereas AVX-512 requires the '-mavx512f' compiler flag
263 */
264# include <immintrin.h> /* for AVX, AVX2, and/or AVX-512 */
265# endif
266# if defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__)
267# include <xmmintrin.h> /* for SSE */
268# include <emmintrin.h> /* for SSE2 */
269# include <pmmintrin.h> /* for SSE3 */
270# else
271# error SAF_ENABLE_SIMD requires at least SSE, SSE2 and SSE3 support
272# endif
273#endif
274
275#if defined(SAF_ENABLE_SOFA_READER_MODULE)
276/*
277 * The built-in saf_sofa_open() SOFA file reader has two implementations:
278 * - By default, the function wraps around the "libmysofa" library
279 * (BSD-3-Clause license), which depends on only zlib (which is included
280 * in framework/resources/zlib). The downsides of this option, is that zlib
281 * has file size limits for each chunk (<4GB) and it is quite slow at
282 * decompressing large files.
283 * - If SAF_ENABLE_NETCDF is defined, then an alternative SOFA reader may be
284 * used. This version requires netcdf to be linked to SAF, along with its
285 * dependencies. The netcdf loader gets around the file size limits of
286 * the libmysofa loader and is also approximately 3 times faster.
287 * Therefore, if you intend to load many large SOFA files
288 * (especially microphone arrays or Ambisonic IRs), then this alternative
289 * SOFA reader is either required (to get around the file size limit) or
290 * may be preferred due to the shorter loading times. The downsides of
291 * using the netcdf option is that it is NOT thread-safe! and requires
292 * these additional external libraries to be linked to SAF.
293 *
294 * Note that the "mysofa" interface, e.g. mysofa_load(), may also be called
295 * directly, rather than using saf_sofa_open().
296 */
297# ifdef SAF_ENABLE_NETCDF
298# include <netcdf.h>
299# endif
300#endif
301
302
303/* ========================================================================== */
304/* Configuration and Status Flags/Strings */
305/* ========================================================================== */
306
307/* Currently employed performance library: */
308#if defined(SAF_USE_INTEL_MKL_LP64)
309# define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "Intel MKL (LP64)"
310#elif defined(SAF_USE_INTEL_MKL_ILP64)
311# define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "Intel MKL (ILP64)"
312#elif defined(SAF_USE_OPEN_BLAS_AND_LAPACKE)
313# define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "OpenBLAS with LAPACKE"
314#elif defined(SAF_USE_ATLAS)
315# define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "ATLAS"
316#elif defined(__APPLE__) && defined(SAF_USE_APPLE_ACCELERATE)
317# define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "Apple Accelerate"
318#else
319# define SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "NONE"
320#endif
321
322/* Status of Intel IPP */
323#if defined(SAF_USE_INTEL_IPP)
324# define SAF_INTEL_IPP_STATUS_STRING "Enabled"
325#else
326# define SAF_INTEL_IPP_STATUS_STRING "Disabled"
327#endif
328
329/* Status of FFTW */
330#if defined(SAF_USE_FFTW)
331# define SAF_FFTW_STATUS_STRING "Enabled"
332#else
333# define SAF_FFTW_STATUS_STRING "Disabled"
334#endif
335
336/* Status of SIMD intrinsics */
337#if defined(SAF_ENABLE_SIMD)
338# define SAF_SIMD_STATUS_STRING "Enabled"
339/* Which SIMD intrinsics are currently enabled? */
340# if defined(__AVX512F__)
341# define SAF_ENABLED_SIMD_INTRINSICS_STRING "SSE, SSE2, SSE3, AVX, AVX2, AVX512F"
342# elif defined(__AVX__) && defined(__AVX2__)
343# define SAF_ENABLED_SIMD_INTRINSICS_STRING "SSE, SSE2, SSE3, AVX, AVX2"
344# elif defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__)
345# define SAF_ENABLED_SIMD_INTRINSICS_STRING "SSE, SSE2, SSE3"
346# else
347# define SAF_ENABLED_SIMD_INTRINSICS_STRING "None"
348# endif
349#else
350# define SAF_SIMD_STATUS_STRING "Disabled"
351# define SAF_ENABLED_SIMD_INTRINSICS_STRING "None"
352#endif
353
354/* Status of netCDF */
355#if defined(SAF_ENABLE_NETCDF)
356# define SAF_NETCDF_STATUS_STRING "Enabled"
357#else
358# define SAF_NETCDF_STATUS_STRING "Disabled"
359#endif
360
362#define SAF_EXTERNALS_CONFIGURATION_STRING \
363 "Current SAF externals configuration: " "\n" \
364 " - Performance library: " SAF_CURRENT_PERFORMANCE_LIBRARY_STRING "\n" \
365 " - Intel IPP status: " SAF_INTEL_IPP_STATUS_STRING "\n" \
366 " - FFTW status: " SAF_FFTW_STATUS_STRING "\n" \
367 " - SIMD status: " SAF_SIMD_STATUS_STRING "\n" \
368 " - Enabled intrinsics: " SAF_ENABLED_SIMD_INTRINSICS_STRING "\n" \
369 " - netCDF status: " SAF_NETCDF_STATUS_STRING "\n"
370
371#ifdef __cplusplus
372} /* extern "C" */
373#endif /* __cplusplus */
374
375#endif /* __SAF_EXTERNALS_H_INCLUDED__ */