52#if defined(SAF_USE_INTEL_MKL_LP64) 
   54# define SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE  
   55#elif defined(SAF_USE_INTEL_MKL_ILP64) 
   57# define SAF_VECLIB_USE_LAPACKE_INTERFACE         
   58#elif defined(SAF_USE_OPEN_BLAS_AND_LAPACKE) 
   59# define SAF_VECLIB_USE_LAPACKE_INTERFACE         
   60#elif defined(SAF_USE_ATLAS) 
   61# define SAF_VECLIB_USE_CLAPACK_INTERFACE         
   62#elif defined(__APPLE__) && defined(SAF_USE_APPLE_ACCELERATE) 
   63# define SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE  
   64#elif defined(SAF_USE_GSL) 
   65# define SAF_VECLIB_USE_GSL_LINALG  
   67# error No LAPACK interface was specified! 
   71#if defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
   72# ifndef SAF_INTEL_MKL_VML_MODE 
   92#  define SAF_INTEL_MKL_VML_MODE ( VML_LA | VML_FTZDAZ_ON ) 
   97#if defined(__APPLE__) && defined(SAF_USE_APPLE_ACCELERATE) 
  103#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  104# ifdef SAF_USE_INTEL_MKL_LP64 
  113#elif defined(SAF_USE_OPEN_BLAS_AND_LAPACKE) 
  132#ifdef SAF_USE_BUILT_IN_NAIVE_CBLAS 
  133void cblas_scopy(
const int N, 
const float *X, 
const int incX, 
float *Y, 
const int incY){
 
  135#if defined(SAF_ENABLE_SIMD) 
  136    if(incX==1 && incY==1){
 
  137        for(i=0; i<(N-3); i+=4)
 
  138            _mm_storeu_ps(Y+i, _mm_loadu_ps(X+i));
 
  144            Y[i*incY] = X[i*incX];
 
  147        Y[i*incY] = X[i*incX];
 
  151void cblas_dcopy(
const int N, 
const double *X, 
const int incX, 
double *Y, 
const int incY){
 
  153    for(i=j=0; i<N; i+=incX, j+=incY)
 
  157void cblas_ccopy(
const int N, 
const void *X, 
const int incX, 
void *Y, 
const int incY){
 
  159    float_complex *cX, *cY;
 
  160    cX = (float_complex*)X;
 
  161    cY = (float_complex*)Y;
 
  162    for(i=j=0; i<N; i+=incX, j+=incY)
 
  166void cblas_zcopy(
const int N, 
const void *X, 
const int incX, 
void *Y, 
const int incY){
 
  168    double_complex *cX, *cY;
 
  169    cX = (double_complex*)X;
 
  170    cY = (double_complex*)Y;
 
  171    for(i=j=0; i<N; i+=incX, j+=incY)
 
  175void cblas_saxpy(
const int N, 
const float alpha, 
const float* X, 
const int incX, 
float* Y, 
const int incY) {
 
  177    for (i=j=0; i<N; i+=incX, j+=incY)
 
  178        Y[i] = alpha * X[i] + Y[i];
 
  181void cblas_daxpy(
const int N, 
const double alpha, 
const double* X, 
const int incX, 
double* Y, 
const int incY) {
 
  183    for (i=j=0; i<N; i+=incX, j+=incY)
 
  184        Y[i] = alpha * X[i] + Y[i];
 
  187float cblas_sdot(
const int N, 
const float* X, 
const int incX, 
const float* Y, 
const int incY){
 
  190#if defined(SAF_ENABLE_SIMD) 
  192    if(incX==1 && incY==1){
 
  194        __m128 sum = _mm_setzero_ps();
 
  196        for(i=0; i<(N-3); i+=4)
 
  197           sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(X+i), _mm_loadu_ps(Y+i)));
 
  199        sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
 
  201        sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(3, 2, 0, 1)));
 
  202        _mm_store_ss(&ret, sum); 
 
  209            ret += X[i*incX] * Y[i*incY];
 
  213        ret += X[i*incX] * Y[i*incY];
 
  225#ifdef SAF_USE_BUILT_IN_NAIVE_CBLAS 
  226void cblas_sgemm(
const CBLAS_LAYOUT Layout, 
const CBLAS_TRANSPOSE TransA,
 
  227                     const CBLAS_TRANSPOSE TransB, 
const int M, 
const int N,
 
  228                     const int K, 
const float alpha, 
const float *A,
 
  229                     const int lda, 
const float *B, 
const int ldb,
 
  230                     const float beta, 
float *C, 
const int ldc)
 
  248    if(!_transA && !_transB){
 
  253                    C[i*ldc+j] += A[i*lda+k] * B[k*ldb+j];
 
  257    else if(_transA && !_transB){
 
  259    else if(_transA && _transB){
 
  276#if defined(SAF_USE_APPLE_ACCELERATE) 
  279    vDSP_minmgvi(a, 1, &minVal, &ind_tmp, (vDSP_Length)len);
 
  280    *index = (int)ind_tmp; 
 
  281#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  282    *index = (int)cblas_isamin(len, a, 1);
 
  286    float minVal=FLT_MAX;
 
  287    for(i=0; i<len; i++){
 
  288        if(fabsf(a[i])<minVal){
 
  289            minVal = fabsf(a[i]);
 
 
  298    const float_complex* a,
 
  303#if defined(SAF_USE_APPLE_ACCELERATE)  
  306    abs_a = 
malloc1d(len*
sizeof(
float));
 
  307    vDSP_vdist((
float*)a, 2, (
float*)a+1, 2, abs_a, 1, (vDSP_Length)len); 
 
  309    vDSP_minmgvi(abs_a, 1, &minVal, &ind_tmp, (vDSP_Length)len);
 
  310    *index = (int)ind_tmp;
 
  312#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  313    *index = (int)cblas_icamin(len, a, 1);
 
  317    float minVal=FLT_MAX;
 
  318    for(i=0; i<len; i++){
 
  319        if(cabsf(a[i])<minVal){
 
  320            minVal = cabsf(a[i]);
 
 
  334#if defined(SAF_USE_APPLE_ACCELERATE) 
  337    vDSP_minmgviD(a, 1, &minVal, &ind_tmp, (vDSP_Length)len);
 
  338    *index = (int)ind_tmp;
 
  339#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  340    *index = (int)cblas_idamin(len, a, 1);
 
  344    double minVal=DBL_MAX;
 
  345    for(i=0; i<len; i++){
 
  346        if(fabs(a[i])<minVal){
 
 
  356    const double_complex* a,
 
  361#if defined(SAF_USE_APPLE_ACCELERATE)  
  364    abs_a = 
malloc1d(len*
sizeof(
double));
 
  365    vDSP_vdistD((
double*)a, 2, (
double*)a+1, 2, abs_a, 1, (vDSP_Length)len); 
 
  367    vDSP_minmgviD(abs_a, 1, &minVal, &ind_tmp, (vDSP_Length)len);
 
  368    *index = (int)ind_tmp;
 
  370#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  371    *index = (int)cblas_izamin(len, a, 1);
 
  375    double minVal=DBL_MAX;
 
  376    for(i=0; i<len; i++){
 
  377        if(cabs(a[i])<minVal){
 
 
  397    *index = (int)cblas_isamax(len, a, 1);
 
 
  402    const float_complex* a,
 
  407    *index = (int)cblas_icamax(len, a, 1);
 
 
  417    *index = (int)cblas_idamax(len, a, 1);
 
 
  422    const double_complex* a,
 
  427    *index = (int)cblas_izamax(len, a, 1);
 
 
  442#if defined(SAF_USE_INTEL_IPP) 
  443    ippsAbs_32f((Ipp32f*)a, (Ipp32f*)c, len);
 
  444#elif defined(SAF_USE_APPLE_ACCELERATE) 
  445    vDSP_vabs(a, 1, c, 1, (vDSP_Length)len);
 
  446#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 
  457    const float_complex* a,
 
  462#if defined(SAF_USE_INTEL_IPP) 
  463    ippsMagnitude_32fc((Ipp32fc*)a, (Ipp32f*)c, len);
 
  464#elif defined(SAF_USE_APPLE_ACCELERATE) 
  465    vDSP_vdist((
float*)a, 2, (
float*)a+1, 2, c, 1, (vDSP_Length)len);
 
  466#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 
  488#if defined(SAF_USE_APPLE_ACCELERATE) 
  489    vvfmodf(c, a, b, &len);
 
  490#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  495        c[i] = fmodf(a[i], b[i]);
 
 
  511#if defined(SAF_USE_INTEL_IPP) 
  512    ippsDivCRev_32f((Ipp32f*)a, 1.0f, (Ipp32f*)c, len);
 
  513#elif defined(SAF_USE_APPLE_ACCELERATE) 
  516    vDSP_svdiv(&one, a, 1, c, 1, (vDSP_Length)len);
 
  517#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  519#elif defined(SAF_ENABLE_SIMD) 
  522# if defined(__AVX512F__) 
  523    for(; i<(len-15); i+=16)
 
  524        _mm512_storeu_ps(c+i, _mm512_rcp14_ps(_mm512_loadu_ps(a+i)));
 
  526# if defined(__AVX__) && defined(__AVX2__) 
  527    for(; i<(len-7); i+=8)
 
  528        _mm256_storeu_ps(c+i, _mm256_rcp_ps(_mm256_loadu_ps(a+i)));
 
  530# if defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) 
  531    for(; i<(len-3); i+=4)
 
  532        _mm_storeu_ps(c+i, _mm_rcp_ps(_mm_loadu_ps(a+i)));
 
 
  550    const float_complex* a,
 
  555#if defined(SAF_USE_INTEL_IPP) 
  556    ippsConj_32fc((Ipp32fc*)a, (Ipp32fc*)c, len);
 
  558    cblas_ccopy(len, a, 1, c, 1);
 
  559    cblas_sscal(len, -1.0f, ((
float*)c)+1, 2);
 
 
  565    const double_complex* a,
 
  570#if defined(SAF_USE_INTEL_IPP) 
  571    ippsConj_64fc((Ipp64fc*)a, (Ipp64fc*)c, len);
 
  573    cblas_zcopy(len, a, 1, c, 1);
 
  574    cblas_dscal(len, -1.0, ((
double*)c)+1, 2);
 
 
  590    cblas_scopy(len, a, 1, c, 1);
 
 
  595    const float_complex* a,
 
  600    cblas_ccopy(len, a, 1, c, 1);
 
 
  610    cblas_dcopy(len, a, 1, c, 1);
 
 
  615    const double_complex* a,
 
  620    cblas_zcopy(len, a, 1, c, 1);
 
 
  638    saf_assert(c!=NULL, 
"'c' can no longer be NULL");
 
  639    saf_assert(a!=c && b!=c, 
"In-place operation is no longer supported, use e.g. cblas_saxby() instead.");
 
  643#if defined(SAF_USE_INTEL_IPP) 
  644    ippsAdd_32f((Ipp32f*)a, (Ipp32f*)b, (Ipp32f*)c, len);
 
  645#elif defined(SAF_USE_APPLE_ACCELERATE) 
  646    vDSP_vadd(a, 1, b, 1, c, 1, (vDSP_Length)len);
 
  647#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  649#elif defined(SAF_ENABLE_SIMD) 
  652# if defined(__AVX512F__) 
  653    for(; i<(len-15); i+=16)
 
  654        _mm512_storeu_ps(c+i, _mm512_add_ps(_mm512_loadu_ps(a+i), _mm512_loadu_ps(b+i)));
 
  656# if defined(__AVX__) && defined(__AVX2__) 
  657    for(; i<(len-7); i+=8)
 
  658        _mm256_storeu_ps(c+i, _mm256_add_ps(_mm256_loadu_ps(a+i), _mm256_loadu_ps(b+i)));
 
  660# if defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) 
  661    for(; i<(len-3); i+=4)
 
  662        _mm_storeu_ps(c+i, _mm_add_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
 
  669    for(i=0; i<len-3; i+=4){
 
  671        c[i+1] = a[i+1] + b[i+1];
 
  672        c[i+2] = a[i+2] + b[i+2];
 
  673        c[i+3] = a[i+3] + b[i+3];
 
  679    for (j = 0; j < len; j++)
 
 
  686    const float_complex* a,
 
  687    const float_complex* b,
 
  694    saf_assert(c!=NULL, 
"'c' can no longer be NULL");
 
  695    saf_assert(a!=c && b!=c, 
"In-place operation is no longer supported, use e.g. cblas_caxby() instead.");
 
  699#if defined(SAF_USE_INTEL_IPP) 
  700    ippsAdd_32fc((Ipp32fc*)a, (Ipp32fc*)b, (Ipp32fc*)c, len);
 
  701#elif defined(SAF_USE_APPLE_ACCELERATE) 
  702    vDSP_vadd((
float*)a, 1, (
float*)b, 1, (
float*)c, 1, 2*(vDSP_Length)len);
 
  703#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  705#elif defined(SAF_ENABLE_SIMD) 
  709    sa = (
float*)a; sb = (
float*)b; sc = (
float*)c;
 
  711# if defined(__AVX512F__) 
  712    for(; i<(len2-15); i+=16)
 
  713        _mm512_storeu_ps(sc+i, _mm512_add_ps(_mm512_loadu_ps(sa+i), _mm512_loadu_ps(sb+i)));
 
  715# if defined(__AVX__) && defined(__AVX2__) 
  716    for(; i<(len2-7); i+=8)
 
  717        _mm256_storeu_ps(sc+i, _mm256_add_ps(_mm256_loadu_ps(sa+i), _mm256_loadu_ps(sb+i)));
 
  719# if defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) 
  720    for(; i<(len2-3); i+=4)
 
  721        _mm_storeu_ps(sc+i, _mm_add_ps(_mm_loadu_ps(sa+i), _mm_loadu_ps(sb+i)));
 
  724        sc[i] = sa[i] + sb[i];
 
  725#elif __STDC_VERSION__ >= 199901L && defined(NDEBUG) 
  728    for(i=0; i<len-3; i+=4){
 
  730        c[i+1] = a[i+1] + b[i+1];
 
  731        c[i+2] = a[i+2] + b[i+2];
 
  732        c[i+3] = a[i+3] + b[i+3];
 
  738    for (j = 0; j < len; j++)
 
  739        c[j] = ccaddf(a[j], b[j]);
 
 
  753    saf_assert(c!=NULL, 
"'c' can no longer be NULL");
 
  754    saf_assert(a!=c && b!=c, 
"In-place operation is no longer supported, use e.g. cblas_daxby() instead.");
 
  758#if defined(SAF_USE_INTEL_IPP) 
  759    ippsAdd_64f((Ipp64f*)a, (Ipp64f*)b, (Ipp64f*)c, len);
 
  760#elif defined(SAF_USE_APPLE_ACCELERATE) 
  761    vDSP_vaddD(a, 1, b, 1, c, 1, (vDSP_Length)len);
 
  762#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  764#elif defined(SAF_ENABLE_SIMD) 
  767# if defined(__AVX512F__) 
  768    for(; i<(len-7); i+=8)
 
  769        _mm512_storeu_pd(c+i, _mm512_add_pd(_mm512_loadu_pd(a+i), _mm512_loadu_pd(b+i)));
 
  771# if defined(__AVX__) && defined(__AVX2__) 
  772    for(; i<(len-3); i+=4)
 
  773        _mm256_storeu_pd(c+i, _mm256_add_pd(_mm256_loadu_pd(a+i), _mm256_loadu_pd(b+i)));
 
  775# if defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) 
  776    for(; i<(len-1); i+=2)
 
  777        _mm_storeu_pd(c+i, _mm_add_pd(_mm_loadu_pd(a+i), _mm_loadu_pd(b+i)));
 
  783    for (j = 0; j < len; j++)
 
 
  790    const double_complex* a,
 
  791    const double_complex* b,
 
  798    saf_assert(c!=NULL, 
"'c' can no longer be NULL");
 
  799    saf_assert(a!=c && b!=c, 
"In-place operation is no longer supported, use e.g. cblas_zaxby() instead.");
 
  803#if defined(SAF_USE_INTEL_IPP) 
  804    ippsAdd_64fc((Ipp64fc*)a, (Ipp64fc*)b, (Ipp64fc*)c, len);
 
  805#elif defined(SAF_USE_APPLE_ACCELERATE) 
  806    vDSP_vaddD((
double*)a, 1, (
double*)b, 1, (
double*)c, 1, 2*(vDSP_Length)len);
 
  807#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  809#elif defined(SAF_ENABLE_SIMD) 
  811    double* sa, *sb, *sc;
 
  813    sa = (
double*)a; sb = (
double*)b; sc = (
double*)c;
 
  815# if defined(__AVX512F__) 
  816    for(; i<(len2-7); i+=8)
 
  817        _mm512_storeu_pd(sc+i, _mm512_add_pd(_mm512_loadu_pd(sa+i), _mm512_loadu_pd(sb+i)));
 
  819# if defined(__AVX__) && defined(__AVX2__) 
  820    for(; i<(len2-3); i+=4)
 
  821        _mm256_storeu_pd(sc+i, _mm256_add_pd(_mm256_loadu_pd(sa+i), _mm256_loadu_pd(sb+i)));
 
  823# if defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) 
  824    for(; i<(len2-1); i+=2)
 
  825        _mm_storeu_pd(sc+i, _mm_add_pd(_mm_loadu_pd(sa+i), _mm_loadu_pd(sb+i)));
 
  828        sc[i] = sa[i] + sb[i];
 
  831    for (j = 0; j < len; j++)
 
  832        c[j] = ccadd(a[j], b[j]);
 
 
  852    saf_assert(a!=c && b!=c, 
"In-place operation is not supported.");
 
  856#if defined(SAF_USE_INTEL_IPP) 
  857    ippsSub_32f((Ipp32f*)b, (Ipp32f*)a, (Ipp32f*)c, len); 
 
  858#elif defined(SAF_USE_APPLE_ACCELERATE) 
  859    vDSP_vsub(b, 1, a, 1, c, 1, (vDSP_Length)len);        
 
  860#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  862#elif defined(SAF_ENABLE_SIMD) 
  865# if defined(__AVX512F__) 
  866    for(; i<(len-15); i+=16)
 
  867        _mm512_storeu_ps(c+i, _mm512_sub_ps(_mm512_loadu_ps(a+i), _mm512_loadu_ps(b+i)));
 
  869# if defined(__AVX__) && defined(__AVX2__) 
  870    for(; i<(len-7); i+=8)
 
  871        _mm256_storeu_ps(c+i, _mm256_sub_ps(_mm256_loadu_ps(a+i), _mm256_loadu_ps(b+i)));
 
  873# if defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) 
  874    for(; i<(len-3); i+=4)
 
  875        _mm_storeu_ps(c+i, _mm_sub_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
 
  882    for(i=0; i<len-3; i+=4){
 
  884        c[i+1] = a[i+1] - b[i+1];
 
  885        c[i+2] = a[i+2] - b[i+2];
 
  886        c[i+3] = a[i+3] - b[i+3];
 
  892    for (j = 0; j < len; j++)
 
 
  899    const float_complex* a,
 
  900    const float_complex* b,
 
  908    saf_assert(a!=c && b!=c, 
"In-place operation is not supported.");
 
  912#if defined(SAF_USE_INTEL_IPP) 
  913    ippsSub_32fc((Ipp32fc*)b, (Ipp32fc*)a, (Ipp32fc*)c, len);                         
 
  914#elif defined(SAF_USE_APPLE_ACCELERATE) 
  915    vDSP_vsub((
float*)b, 1, (
float*)a, 1, (
float*)c, 1, 2*(vDSP_Length)len); 
 
  916#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  918#elif defined(SAF_ENABLE_SIMD) 
  922    sa = (
float*)a; sb = (
float*)b; sc = (
float*)c;
 
  924# if defined(__AVX512F__) 
  925    for(; i<(len2-15); i+=16)
 
  926        _mm512_storeu_ps(sc+i, _mm512_sub_ps(_mm512_loadu_ps(sa+i), _mm512_loadu_ps(sb+i)));
 
  928# if defined(__AVX__) && defined(__AVX2__) 
  929    for(; i<(len2-7); i+=8)
 
  930        _mm256_storeu_ps(sc+i, _mm256_sub_ps(_mm256_loadu_ps(sa+i), _mm256_loadu_ps(sb+i)));
 
  932# if defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) 
  933    for(; i<(len2-3); i+=4)
 
  934        _mm_storeu_ps(sc+i, _mm_sub_ps(_mm_loadu_ps(sa+i), _mm_loadu_ps(sb+i)));
 
  937        sc[i] = sa[i] - sb[i];
 
  938#elif __STDC_VERSION__ >= 199901L && defined(NDEBUG) 
  941    for(i=0; i<len-3; i+=4){
 
  943        c[i+1] = a[i+1] - b[i+1];
 
  944        c[i+2] = a[i+2] - b[i+2];
 
  945        c[i+3] = a[i+3] - b[i+3];
 
  951    for (j = 0; j < len; j++)
 
  952        c[j] = ccsubf(a[j], b[j]);
 
 
  967    saf_assert(a!=c && b!=c, 
"In-place operation is not supported.");
 
  971#if defined(SAF_USE_INTEL_IPP) 
  972    ippsSub_64f((Ipp64f*)b, (Ipp64f*)a, (Ipp64f*)c, len);  
 
  973#elif defined(SAF_USE_APPLE_ACCELERATE) 
  974    vDSP_vsubD(b, 1, a, 1, c, 1, (vDSP_Length)len);        
 
  975#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
  977#elif defined(SAF_ENABLE_SIMD) 
  980# if defined(__AVX512F__) 
  981    for(; i<(len-7); i+=8)
 
  982        _mm512_storeu_pd(c+i, _mm512_sub_pd(_mm512_loadu_pd(a+i), _mm512_loadu_pd(b+i)));
 
  984# if defined(__AVX__) && defined(__AVX2__) 
  985    for(; i<(len-3); i+=4)
 
  986        _mm256_storeu_pd(c+i, _mm256_sub_pd(_mm256_loadu_pd(a+i), _mm256_loadu_pd(b+i)));
 
  988# if defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) 
  989    for(; i<(len-1); i+=2)
 
  990        _mm_storeu_pd(c+i, _mm_sub_pd(_mm_loadu_pd(a+i), _mm_loadu_pd(b+i)));
 
  996    for (j = 0; j < len; j++)
 
 
 1003    const double_complex* a,
 
 1004    const double_complex* b,
 
 1012    saf_assert(a!=c && b!=c, 
"In-place operation is not supported.");
 
 1016#if defined(SAF_USE_INTEL_IPP) 
 1017    ippsSub_64fc((Ipp64fc*)b, (Ipp64fc*)a, (Ipp64fc*)c, len);                             
 
 1018#elif defined(SAF_USE_APPLE_ACCELERATE) 
 1019    vDSP_vsubD((
double*)b, 1, (
double*)a, 1, (
double*)c, 1, 2*(vDSP_Length)len); 
 
 1020#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 1022#elif defined(SAF_ENABLE_SIMD) 
 1024    double* sa, *sb, *sc;
 
 1026    sa = (
double*)a; sb = (
double*)b; sc = (
double*)c;
 
 1028# if defined(__AVX512F__) 
 1029    for(; i<(len2-7); i+=8)
 
 1030        _mm512_storeu_pd(sc+i, _mm512_sub_pd(_mm512_loadu_pd(sa+i), _mm512_loadu_pd(sb+i)));
 
 1032# if defined(__AVX__) && defined(__AVX2__) 
 1033    for(; i<(len2-3); i+=4)
 
 1034        _mm256_storeu_pd(sc+i, _mm256_sub_pd(_mm256_loadu_pd(sa+i), _mm256_loadu_pd(sb+i)));
 
 1036# if defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) 
 1037    for(; i<(len2-1); i+=2)
 
 1038        _mm_storeu_pd(sc+i, _mm_sub_pd(_mm_loadu_pd(sa+i), _mm_loadu_pd(sb+i)));
 
 1041        sc[i] = sa[i] - sb[i];
 
 1044    for (j = 0; j < len; j++)
 
 1045        c[j] = ccsub(a[j], b[j]);
 
 
 1064    saf_assert(c!=NULL, 
"'c' can no longer be NULL");
 
 1065    saf_assert(a!=c && b!=c, 
"In-place operation is no longer supported.");
 
 1069#if defined(SAF_USE_INTEL_IPP) 
 1070    ippsMul_32f((Ipp32f*)a, (Ipp32f*)b, (Ipp32f*)c, len);
 
 1071#elif defined(SAF_USE_APPLE_ACCELERATE) 
 1072    vDSP_vmul(a, 1, b, 1, c, 1, (vDSP_Length)len);
 
 1073#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 1075#elif defined(SAF_ENABLE_SIMD) 
 1078# if defined(__AVX512F__) 
 1079    for(; i<(len-15); i+=16)
 
 1080        _mm512_storeu_ps(c+i, _mm512_mul_ps(_mm512_loadu_ps(a+i), _mm512_loadu_ps(b+i)));
 
 1082# if defined(__AVX__) && defined(__AVX2__) 
 1083    for(; i<(len-7); i+=8)
 
 1084        _mm256_storeu_ps(c+i, _mm256_mul_ps(_mm256_loadu_ps(a+i), _mm256_loadu_ps(b+i)));
 
 1086# if defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) 
 1087    for(; i<(len-3); i+=4)
 
 1088        _mm_storeu_ps(c+i, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
 
 1092#elif defined(NDEBUG) 
 1095    for(i=0; i<len-3; i+=4){
 
 1097        c[i+1] = a[i+1] * b[i+1];
 
 1098        c[i+2] = a[i+2] * b[i+2];
 
 1099        c[i+3] = a[i+3] * b[i+3];
 
 1105    for (j = 0; j < len; j++)
 
 
 1112    const float_complex* a,
 
 1113    const float_complex* b,
 
 1120    saf_assert(c!=NULL, 
"'c' can no longer be NULL");
 
 1121    saf_assert(a!=c && b!=c, 
"In-place operation is no longer supported.");
 
 1125#if defined(SAF_USE_INTEL_IPP) 
 1126    ippsMul_32fc((Ipp32fc*)a, (Ipp32fc*)b, (Ipp32fc*)c, len);
 
 1127#elif defined(SAF_USE_APPLE_ACCELERATE)   
 1129    vDSP_vmul((
float*)a, 2, (
float*)b+1, 2, (
float*)c+1, 2, (vDSP_Length)len);
 
 1130    vDSP_vma((
float*)a+1, 2, (
float*)b, 2, (
float*)c+1, 2, (
float*)c, 2, (vDSP_Length)len); 
 
 1131    cblas_scopy(len, (
float*)c, 2, (
float*)c+1, 2); 
 
 1133    vDSP_vmmsb((
float*)a, 2, (
float*)b, 2, (
float*)a+1, 2, (
float*)b+1, 2, (
float*)c, 2, (vDSP_Length)len);
 
 1134#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 1136#elif defined(SAF_ENABLE_SIMD) 
 1138    float* sa, *sb, *sc;
 
 1139    sa = (
float*)a; sb = (
float*)b; sc = (
float*)c;
 
 1140# if (defined(__AVX__) && defined(__AVX2__)) || defined(__AVX512F__)  
 1141    __m256i permute_ri = _mm256_set_epi32(6, 7, 4, 5, 2, 3, 0, 1);
 
 1142    for(i=0; i<(len-3); i+=4){
 
 1144        __m256 src1 = _mm256_moveldup_ps(_mm256_loadu_ps(sa+2*i)); 
 
 1146        __m256 src2 = _mm256_loadu_ps(sb+2*i); 
 
 1148        __m256 tmp1 = _mm256_mul_ps(src1, src2);
 
 1150        __m256 b1 = _mm256_permutevar8x32_ps(src2, permute_ri);
 
 1152        src1 = _mm256_movehdup_ps(_mm256_loadu_ps(sa+2*i)); 
 
 1154        __m256 tmp2 = _mm256_mul_ps(src1, b1);
 
 1156        _mm256_storeu_ps(sc+2*i, _mm256_addsub_ps(tmp1, tmp2));
 
 1158# elif defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) 
 1159    for(i=0; i<(len-1); i+=2){
 
 1161        __m128 src1 = _mm_moveldup_ps(_mm_loadu_ps(sa+2*i)); 
 
 1163        __m128 src2 = _mm_loadu_ps(sb+2*i); 
 
 1165        __m128 tmp1 = _mm_mul_ps(src1, src2);
 
 1167        __m128 b1 = _mm_shuffle_ps(src2, src2, _MM_SHUFFLE(2, 3, 0, 1));
 
 1169        src1 = _mm_movehdup_ps(_mm_loadu_ps(sa+2*i)); 
 
 1171        __m128 tmp2 = _mm_mul_ps(src1, b1);
 
 1173        _mm_storeu_ps(sc+2*i, _mm_addsub_ps(tmp1, tmp2));
 
 1177        sc[2*i]   = sa[2*i] * sb[2*i]   - sa[2*i+1] * sb[2*i+1];
 
 1178        sc[2*i+1] = sa[2*i] * sb[2*i+1] + sa[2*i+1] * sb[2*i];
 
 1180#elif __STDC_VERSION__ >= 199901L && defined(NDEBUG) 
 1183    for(i=0; i<len-3; i+=4){
 
 1185        c[i+1] = a[i+1] * b[i+1];
 
 1186        c[i+2] = a[i+2] * b[i+2];
 
 1187        c[i+3] = a[i+3] * b[i+3];
 
 1191#elif __STDC_VERSION__ >= 199901L  
 1193    for (i = 0; i < len; i++)
 
 1197    for (i = 0; i < len; i++)
 
 1198        c[i] = ccmulf(a[i], b[i]);
 
 
 1215    c[0] = cblas_sdot (len, a, 1, b, 1);
 
 
 1220    const float_complex* a,
 
 1221    const float_complex* b,
 
 1230            cblas_cdotu_sub(len, a, 1, b, 1, c);
 
 1233            cblas_cdotc_sub(len, a, 1, b, 1, c);
 
 
 1251#if defined(SAF_USE_INTEL_IPP) 
 1253        ippsMulC_32f_I((Ipp32f)s[0], (Ipp32f*)a, len);
 
 1255        ippsMulC_32f((Ipp32f*)a, (Ipp32f)s[0], (Ipp32f*)c, len);
 
 1256#elif defined(SAF_USE_APPLE_ACCELERATE) 
 1258        cblas_sscal(len, s[0], a, 1);
 
 1260        vDSP_vsmul(a, 1, s, c, 1, (vDSP_Length)len);
 
 1263        cblas_sscal(len, s[0], a, 1);
 
 1266        cblas_sscal(len, s[0], c, 1);
 
 
 1274    const float_complex* s,
 
 1280        cblas_cscal(len, s, a, 1);
 
 1282        cblas_ccopy(len, a, 1, c, 1);
 
 1283        cblas_cscal(len, s, c, 1);
 
 
 1295#if defined(SAF_USE_INTEL_IPP) 
 1297        ippsMulC_64f_I((Ipp64f)s[0], (Ipp64f*)a, len);
 
 1299        ippsMulC_64f((Ipp64f*)a, (Ipp64f)s[0], (Ipp64f*)c, len);
 
 1300#elif defined(SAF_USE_APPLE_ACCELERATE) 
 1302        cblas_dscal(len, s[0], a, 1);
 
 1304        vDSP_vsmulD(a, 1, s, c, 1, (vDSP_Length)len);
 
 1307        cblas_dscal(len, s[0], a, 1);
 
 1310        cblas_dscal(len, s[0], c, 1);
 
 
 1318    const double_complex* s,
 
 1324        cblas_zscal(len, s, a, 1);
 
 1326        cblas_zcopy(len, a, 1, c, 1);
 
 1327        cblas_zscal(len, s, c, 1);
 
 
 1345        memset(c, 0, len*
sizeof(
float));
 
 1348#if defined(SAF_USE_INTEL_IPP) 
 1349    ippsDivC_32f((Ipp32f*)a, (Ipp32f)s[0], (Ipp32f*)c, len);
 
 1350#elif defined(SAF_USE_APPLE_ACCELERATE) 
 1351    vDSP_vsdiv(a, 1, s, c, 1, (vDSP_Length)len);
 
 1353    cblas_scopy(len, a, 1, c, 1);
 
 1354    cblas_sscal(len, 1.0f/s[0], c, 1);
 
 
 1371#if defined(SAF_USE_INTEL_IPP) 
 1372    ippsAddC_32f((Ipp32f*)a, (Ipp32f)s[0], (Ipp32f*)c, len);
 
 1373#elif defined(SAF_USE_APPLE_ACCELERATE) 
 1374    vDSP_vsadd(a, 1, s, c, 1, (vDSP_Length)len);
 
 1375#elif defined(SAF_ENABLE_SIMD) 
 1377# if defined(__AVX512F__) 
 1378    __m512 s16 = _mm512_set1_ps(s[0]);
 
 1379    for(i=0; i<(len-15); i+=16)
 
 1380        _mm512_storeu_ps(c+i, _mm512_add_ps(_mm512_loadu_ps(a+i), s16));
 
 1381# elif defined(__AVX__) && defined(__AVX2__) 
 1382    __m256 s8 = _mm256_set1_ps(s[0]);
 
 1383    for(i=0; i<(len-7); i+=8)
 
 1384        _mm256_storeu_ps(c+i, _mm256_add_ps(_mm256_loadu_ps(a+i), s8));
 
 1385# elif defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) 
 1386    __m128 s4 = _mm_load_ps1(s);
 
 1387    for(i=0; i<(len-3); i+=4)
 
 1388        _mm_storeu_ps(c+i, _mm_add_ps(_mm_loadu_ps(a+i), s4));
 
 1394    for(i=0; i<len; i++)
 
 
 1412#if defined(SAF_USE_INTEL_IPP) 
 1413    ippsSubC_32f((Ipp32f*)a, (Ipp32f)s[0], (Ipp32f*)c, len);
 
 1414#elif defined(SAF_USE_APPLE_ACCELERATE) 
 1417    vDSP_vsadd(a, 1, &inv_s, c, 1, (vDSP_Length)len);
 
 1418#elif defined(SAF_ENABLE_SIMD) 
 1420# if defined(__AVX512F__) 
 1421    __m512 s16 = _mm512_set1_ps(s[0]);
 
 1422    for(i=0; i<(len-15); i+=16)
 
 1423        _mm512_storeu_ps(c+i, _mm512_sub_ps(_mm512_loadu_ps(a+i), s16));
 
 1424# elif defined(__AVX__) && defined(__AVX2__) 
 1425    __m256 s8 = _mm256_set1_ps(s[0]);
 
 1426    for(i=0; i<(len-7); i+=8)
 
 1427        _mm256_storeu_ps(c+i, _mm256_sub_ps(_mm256_loadu_ps(a+i), s8));
 
 1428# elif defined(__SSE__) && defined(__SSE2__) && defined(__SSE3__) 
 1429    __m128 s4 = _mm_load_ps1(s);
 
 1430    for(i=0; i<(len-3); i+=4)
 
 1431        _mm_storeu_ps(c+i, _mm_sub_ps(_mm_loadu_ps(a+i), s4));
 
 1437    for(i=0; i<len; i++)
 
 
 1456#ifdef SAF_USE_APPLE_ACCELERATE  
 1458    vDSP_Length* inds_vDSP;
 
 1459    inds_vDSP = 
malloc1d(len*
sizeof(vDSP_Length));
 
 1460    for(i=0; i<len; i++)
 
 1461        inds_vDSP[i] = (vDSP_Length)(inds[i] + 1);
 
 1462    vDSP_vgathr(sv, inds_vDSP, 1, cv, 1, (vDSP_Length)len);
 
 1464#elif defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 1470        for(i=0; i<len; i++)
 
 1472        cblas_sgthr(len, sv, cv, (
veclib_int*)inds_tmp);
 
 1475#elif defined(NDEBUG) 
 1477    for(i=0; i<len-3; i+=4){
 
 1478        cv[i] = sv[inds[i]];
 
 1479        cv[i+1] = sv[inds[i+1]];
 
 1480        cv[i+2] = sv[inds[i+2]];
 
 1481        cv[i+3] = sv[inds[i+3]];
 
 1484        cv[i] = sv[inds[i]];
 
 1486    for(i=0; i<len; i++)
 
 1487        cv[i] = sv[inds[i]];
 
 
 1493    const float_complex* sv,
 
 1500#if defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 1506        for(i=0; i<len; i++)
 
 1508        cblas_cgthr(len, sv, cv, (
veclib_int*)inds_tmp);
 
 1511#elif defined(NDEBUG) 
 1513    for(i=0; i<len-3; i+=4){
 
 1514        cv[i] = sv[inds[i]];
 
 1515        cv[i+1] = sv[inds[i+1]];
 
 1516        cv[i+2] = sv[inds[i+2]];
 
 1517        cv[i+3] = sv[inds[i+3]];
 
 1520        cv[i] = sv[inds[i]];
 
 1522    for(i=0; i<len; i++)
 
 1523        cv[i] = sv[inds[i]];
 
 
 1536#if defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 1542        for(i=0; i<len; i++)
 
 1544        cblas_dgthr(len, sv, cv, (
veclib_int*)inds_tmp);
 
 1547#elif defined(NDEBUG) 
 1549    for(i=0; i<len-3; i+=4){
 
 1550        cv[i] = sv[inds[i]];
 
 1551        cv[i+1] = sv[inds[i+1]];
 
 1552        cv[i+2] = sv[inds[i+2]];
 
 1553        cv[i+3] = sv[inds[i+3]];
 
 1556        cv[i] = sv[inds[i]];
 
 1558    for(i=0; i<len; i++)
 
 1559        cv[i] = sv[inds[i]];
 
 
 1565    const double_complex* sv,
 
 1572#if defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 1578        for(i=0; i<len; i++)
 
 1580        cblas_zgthr(len, sv, cv, (
veclib_int*)inds_tmp);
 
 1583#elif defined(NDEBUG) 
 1585    for(i=0; i<len-3; i+=4){
 
 1586        cv[i] = sv[inds[i]];
 
 1587        cv[i+1] = sv[inds[i+1]];
 
 1588        cv[i+2] = sv[inds[i+2]];
 
 1589        cv[i+3] = sv[inds[i+3]];
 
 1592        cv[i] = sv[inds[i]];
 
 1594    for(i=0; i<len; i++)
 
 1595        cv[i] = sv[inds[i]];
 
 
 1605typedef struct _utility_ssvd_data {
 
 1606    int maxDim1, maxDim2;
 
 1608    float* a, *s, *u, *vt;
 
 
 1617    h->maxDim1 = maxDim1;
 
 1618    h->maxDim2 = maxDim2;
 
 1619    h->currentWorkSize = 0;
 
 1620    h->a = 
malloc1d(maxDim1*maxDim2*
sizeof(
float));
 
 1622    h->u = 
malloc1d(maxDim1*maxDim1*
sizeof(
float));
 
 1623    h->vt = 
malloc1d(maxDim2*maxDim2*
sizeof(
float));
 
 
 1661    m = dim1; n = dim2; lda = dim1; ldu = dim1; ldvt = dim2;
 
 1669        saf_assert(dim1<=h->maxDim1, 
"dim1 exceeds the maximum length specified");
 
 1670        saf_assert(dim2<=h->maxDim2, 
"dim2 exceeds the maximum length specified");
 
 1675    for(i=0; i<dim1; i++)
 
 1676        for(j=0; j<dim2; j++)
 
 1677            h->a[j*dim1+i] = A[i*dim2 +j];
 
 1681#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 1682    sgesvd_( 
"A", 
"A", &m, &n, h->a, &lda, h->s, h->u, &ldu, h->vt, &ldvt, &wkopt, &lwork, &info );
 
 1683#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 1685#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 1686    info = LAPACKE_sgesvd_work(CblasColMajor, 
'A', 
'A', m, n, h->a, lda, h->s, h->u, ldu, h->vt, ldvt, &wkopt, lwork);
 
 1689    if(lwork>h->currentWorkSize){
 
 1690        h->currentWorkSize = lwork;
 
 1691        h->work = 
realloc1d(h->work, h->currentWorkSize*
sizeof(
float));
 
 1695#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 1696    sgesvd_( 
"A", 
"A", &m, &n, h->a, &lda, h->s, h->u, &ldu, h->vt, &ldvt, h->work, &lwork, &info );
 
 1697#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 1699#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 1700    info = LAPACKE_sgesvd_work(CblasColMajor, 
'A', 
'A', m, n, h->a, lda, h->s, h->u, ldu, h->vt, ldvt, h->work, lwork);
 
 1706            memset(S, 0, dim1*dim2*
sizeof(
float));
 
 1708            memset(U, 0, dim1*dim1*
sizeof(
float));
 
 1710            memset(V, 0, dim2*dim2*
sizeof(
float));
 
 1712            memset(sing, 0, 
SAF_MIN(dim1, dim2)*
sizeof(
float));
 
 1717        saf_print_warning(
"Could not compute SVD in utility_ssvd(). Output matrices/vectors have been zeroed.");
 
 1723            memset(S, 0, dim1*dim2*
sizeof(
float));
 
 1725            for(i=0; i<
SAF_MIN(dim1, dim2); i++)
 
 1726                S[i*dim2+i] = h->s[i];
 
 1731            for(i=0; i<dim1; i++)
 
 1732                for(j=0; j<dim1; j++)
 
 1733                    U[i*dim1+j] = h->u[j*dim1+i];
 
 1737            for(i=0; i<dim2; i++)
 
 1738                for(j=0; j<dim2; j++)
 
 1739                    V[i*dim2+j] = h->vt[i*dim2+j];
 
 1741            for(i=0; i<
SAF_MIN(dim1, dim2); i++)
 
 
 1750typedef struct _utility_csvd_data {
 
 1751    int maxDim1, maxDim2;
 
 1753    float_complex* a, *u, *vt, *work;
 
 
 1762    h->maxDim1 = maxDim1;
 
 1763    h->maxDim2 = maxDim2;
 
 1764    h->currentWorkSize = 0;
 
 1765    h->a = 
malloc1d(maxDim1*maxDim2*
sizeof(float_complex));
 
 1767    h->u = 
malloc1d(maxDim1*maxDim1*
sizeof(float_complex));
 
 1768    h->vt = 
malloc1d(maxDim2*maxDim2*
sizeof(float_complex));
 
 
 1794    const float_complex* A,
 
 1806    float_complex wkopt;
 
 1807#if defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 1808    const MKL_Complex8 calpha = {1.0f, 0.0f};
 
 1813    m = dim1; n = dim2; lda = dim1; ldu = dim1; ldvt = dim2;
 
 1821        saf_assert(dim1<=h->maxDim1, 
"dim1 exceeds the maximum length specified");
 
 1822        saf_assert(dim2<=h->maxDim2, 
"dim2 exceeds the maximum length specified");
 
 1827#if defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 1830    for(i=0; i<dim1; i++)
 
 1831        for(j=0; j<dim2; j++)
 
 1832            h->a[j*dim1+i] = A[i*dim2+j];
 
 1837#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 1840#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 1842#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 1843    info = LAPACKE_cgesvd_work(CblasColMajor, 
'A', 
'A', m, n, (
veclib_float_complex*)h->a, lda, h->s, (
veclib_float_complex*)h->u, ldu,
 
 1847    if(lwork>h->currentWorkSize){
 
 1848        h->currentWorkSize = lwork;
 
 1849        h->work = 
realloc1d(h->work, h->currentWorkSize*
sizeof(float_complex));
 
 1853#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 1854    cgesvd_( 
"A", 
"A", &m, &n, (
veclib_float_complex*)h->a, &lda, h->s, (
veclib_float_complex*)h->u, &ldu, (
veclib_float_complex*)h->vt, &ldvt,
 
 1856#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 1858#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 1859    info = LAPACKE_cgesvd_work(CblasColMajor, 
'A', 
'A', m, n, (
veclib_float_complex*)h->a, lda, h->s, (
veclib_float_complex*)h->u, ldu, (
veclib_float_complex*)h->vt, ldvt,
 
 1866            memset(S, 0, dim1*dim2*
sizeof(float_complex));
 
 1868            memset(U, 0, dim1*dim1*
sizeof(float_complex));
 
 1870            memset(V, 0, dim2*dim2*
sizeof(float_complex));
 
 1872            memset(sing, 0, 
SAF_MIN(dim1, dim2)*
sizeof(float_complex));
 
 1877        saf_print_warning(
"Could not compute SVD in utility_csvd(). Output matrices/vectors have been zeroed.");
 
 1884            memset(S, 0, dim1*dim2*
sizeof(float_complex));
 
 1885            cblas_scopy(
SAF_MIN(dim1, dim2), h->s, 1, (
float*)S, 2*(dim2+1));
 
 1890#if defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 1893            for(i=0; i<dim1; i++)
 
 1894                for(j=0; j<dim1; j++)
 
 1895                    U[i*dim1+j] = h->u[j*dim1+i];
 
 1901            cblas_ccopy(dim2*dim2, h->vt, 1, V, 1);
 
 1902            cblas_sscal(dim2*dim2, -1.0f, ((
float*)V)+1, 2); 
 
 1906            cblas_scopy(
SAF_MIN(dim1, dim2), h->s, 1, sing, 1);
 
 
 1919typedef struct _utility_sseig_data {
 
 
 1933    h->currentWorkSize = 0;
 
 1934    h->w = 
malloc1d(maxDim*
sizeof(
float));
 
 1935    h->a = 
malloc1d(maxDim*maxDim*
sizeof(
float));
 
 
 1979        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 1984    for(i=0; i<dim; i++)
 
 1985        for(j=0; j<dim; j++)
 
 1986            h->a[i*dim+j] = A[j*dim+i];
 
 1990#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 1991    ssyev_( 
"Vectors", 
"Upper", &n, h->a, &lda, h->w, &wkopt, &lwork, &info );
 
 1992#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 1994#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 1995    info = LAPACKE_ssyev_work(CblasColMajor, 
'V', 
'U', n, h->a, lda, h->w, &wkopt, lwork);
 
 1998    if(lwork>h->currentWorkSize){
 
 1999        h->currentWorkSize = lwork;
 
 2000        h->work = 
realloc1d(h->work, h->currentWorkSize*
sizeof(
float));
 
 2004#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 2005    ssyev_( 
"Vectors", 
"Upper", &n, h->a, &lda, h->w, h->work, &lwork, &info );
 
 2006#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 2008#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 2009    info = LAPACKE_ssyev_work(CblasColMajor, 
'V', 
'U', n, h->a, lda, h->w, h->work, lwork);
 
 2014        memset(D, 0, dim*dim*
sizeof(
float));
 
 2018            memset(V, 0, dim*dim*
sizeof(
float));
 
 2024        saf_print_warning(
"Could not compute EVD in utility_sseig(). Output matrices/vectors have been zeroed.");
 
 2029            for(i=0; i<dim; i++){
 
 2031                    for(j=0; j<dim; j++)
 
 2032                        V[i*dim+j] = h->a[(dim-j-1)*dim+i]; 
 
 2034                    D[i*dim+i] = h->w[dim-i-1]; 
 
 2036                    eig[i] = h->w[dim-i-1];
 
 2040            for(i=0; i<dim; i++){
 
 2042                    for(j=0; j<dim; j++)
 
 2043                        V[i*dim+j] = h->a[j*dim+i]; 
 
 2045                    D[i*dim+i] = h->w[i]; 
 
 
 2057typedef struct _utility_cseig_data {
 
 2063    float_complex* work;
 
 
 2072    h->currentWorkSize = 
SAF_MAX(1, 2*maxDim-1);
 
 2073    h->rwork = 
malloc1d((3*maxDim-2)*
sizeof(
float));
 
 2074    h->w = 
malloc1d(maxDim*
sizeof(
float));
 
 2075    h->a = 
malloc1d(maxDim*maxDim*
sizeof(float_complex));
 
 2076    h->work = 
malloc1d(h->currentWorkSize*
sizeof(float_complex));
 
 
 2098    const float_complex* A,
 
 2109    float_complex wkopt;
 
 2110#if defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 2111    const MKL_Complex8 calpha = {1.0f, 0.0f};
 
 2125        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 2130#if defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 2133    for(i=0; i<dim; i++)
 
 2134        for(j=0; j<dim; j++)
 
 2135            h->a[i*dim+j] = A[j*dim+i];
 
 2140#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 2142#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 2144#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 2148    if(lwork>h->currentWorkSize){
 
 2149        h->currentWorkSize = lwork;
 
 2150        h->work = 
realloc1d(h->work, h->currentWorkSize*
sizeof(float_complex));
 
 2154#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 2156#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 2158#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 2164        memset(D, 0, dim*dim*
sizeof(float_complex));
 
 2168            memset(V, 0, dim*dim*
sizeof(float_complex));
 
 2174        saf_print_warning(
"Could not compute EVD in utility_cseig(). Output matrices/vectors have been zeroed.");
 
 2180        if(sortDecFLAG && V!=NULL)
 
 2181            for(i=0; i<(int)((float)dim/2.0f); i++)
 
 2182                cblas_cswap(dim, &h->a[i*dim], 1, &h->a[(dim-i-1)*dim], 1);
 
 2185#if defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 2188            for(i=0; i<dim; i++)
 
 2189                for(j=0; j<dim; j++)
 
 2190                    V[i*dim+j] = h->a[j*dim+i];
 
 2195            for(i=0; i<dim; i++) {
 
 2197                    D[i*dim+i] = cmplxf(h->w[dim-i-1], 0.0f); 
 
 2199                    eig[i] = h->w[dim-i-1];
 
 2203            for(i=0; i<dim; i++){
 
 2205                    D[i*dim+i] = cmplxf(h->w[i], 0.0f); 
 
 
 2222typedef struct _utility_ceigmp_data {
 
 2225    float_complex* a, *b, *vl, *vr, *alpha, *beta;
 
 2227    float_complex* work;
 
 
 2236    h->currentWorkSize = 4*maxDim;
 
 2237    h->rwork = 
malloc1d(4*(h->currentWorkSize)*
sizeof(
float));
 
 2238    h->a = 
malloc1d(maxDim*maxDim*
sizeof(float_complex));
 
 2239    h->b = 
malloc1d(maxDim*maxDim*
sizeof(float_complex));
 
 2240    h->vl = 
malloc1d(maxDim*maxDim*
sizeof(float_complex));
 
 2241    h->vr = 
malloc1d(maxDim*maxDim*
sizeof(float_complex));
 
 2242    h->alpha = 
malloc1d(maxDim*
sizeof(float_complex));
 
 2243    h->beta = 
malloc1d(maxDim*
sizeof(float_complex));
 
 2244    h->work = 
malloc1d(h->currentWorkSize*
sizeof(float_complex));
 
 
 2270    const float_complex* A,
 
 2271    const float_complex* B,
 
 2283    n = lda = ldb = ldvl = ldvr = dim;
 
 2291        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 2296    for(i=0; i<dim; i++)
 
 2297        for(j=0; j<dim; j++)
 
 2298            h->a[j*dim+i] = A[i*dim+j];
 
 2299    for(i=0; i<dim; i++)
 
 2300        for(j=0; j<dim; j++)
 
 2301            h->b[j*dim+i] = B[i*dim+j];
 
 2304    lwork = h->currentWorkSize;
 
 2305#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 2308#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 2310#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 2311    info = LAPACKE_cggev_work(CblasColMajor, 
'V', 
'V', n, (
veclib_float_complex*)h->a, lda, (
veclib_float_complex*)h->b, ldb, (
veclib_float_complex*)h->alpha, (
veclib_float_complex*)h->beta,
 
 2316        memset(D, 0, dim*dim*
sizeof(float_complex));
 
 2321            memset(VL, 0, dim*dim*
sizeof(float_complex));
 
 2323            memset(VR, 0, dim*dim*
sizeof(float_complex));
 
 2329        saf_print_warning(
"Could not compute EVD in utility_ceigmp(). Output matrices/vectors have been zeroed.");
 
 2335            for(i=0; i<dim; i++)
 
 2336                D[i*dim+i] = ccdivf(h->alpha[i],h->beta[i]);
 
 2339            for(i=0; i<dim; i++)
 
 2340                for(j=0; j<dim; j++)
 
 2341                    VL[i*dim+j] = h->vl[j*dim+i];
 
 2343            for(i=0; i<dim; i++)
 
 2344                for(j=0; j<dim; j++)
 
 2345                    VR[i*dim+j] = h->vr[j*dim+i];
 
 
 2353typedef struct _utility_zeigmp_data {
 
 2356    double_complex* a, *b, *vl, *vr, *alpha, *beta;
 
 2358    double_complex* work;
 
 
 2367    h->currentWorkSize = 4*maxDim;
 
 2368    h->rwork = 
malloc1d(4*(h->currentWorkSize)*
sizeof(
double));
 
 2369    h->a = 
malloc1d(maxDim*maxDim*
sizeof(double_complex));
 
 2370    h->b = 
malloc1d(maxDim*maxDim*
sizeof(double_complex));
 
 2371    h->vl = 
malloc1d(maxDim*maxDim*
sizeof(double_complex));
 
 2372    h->vr = 
malloc1d(maxDim*maxDim*
sizeof(double_complex));
 
 2373    h->alpha = 
malloc1d(maxDim*
sizeof(double_complex));
 
 2374    h->beta = 
malloc1d(maxDim*
sizeof(double_complex));
 
 2375    h->work = 
malloc1d(h->currentWorkSize*
sizeof(double_complex));
 
 
 2401    const double_complex* A,
 
 2402    const double_complex* B,
 
 2414    n = lda = ldb = ldvl = ldvr = dim;
 
 2422        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 2427    for(i=0; i<dim; i++)
 
 2428        for(j=0; j<dim; j++)
 
 2429            h->a[j*dim+i] = A[i*dim+j];
 
 2430    for(i=0; i<dim; i++)
 
 2431        for(j=0; j<dim; j++)
 
 2432            h->b[j*dim+i] = B[i*dim+j]; 
 
 2435    lwork = h->currentWorkSize;
 
 2436#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 2439#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 2441#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 2442    info = LAPACKE_zggev_work(CblasColMajor, 
'V', 
'V', n, (
veclib_double_complex*)h->a, lda, (
veclib_double_complex*)h->b, ldb, (
veclib_double_complex*)h->alpha, (
veclib_double_complex*)h->beta,
 
 2447        memset(D, 0, dim*dim*
sizeof(double_complex));
 
 2452            memset(VL, 0, dim*dim*
sizeof(double_complex));
 
 2454            memset(VR, 0, dim*dim*
sizeof(double_complex));
 
 2460        saf_print_warning(
"Could not compute EVD in utility_zeigmp(). Output matrices/vectors have been zeroed.");
 
 2466            for(i=0; i<dim; i++)
 
 2467                D[i*dim+i] = ccdiv(h->alpha[i],h->beta[i]);
 
 2470            for(i=0; i<dim; i++)
 
 2471                for(j=0; j<dim; j++)
 
 2472                    VL[i*dim+j] = h->vl[j*dim+i];
 
 2474            for(i=0; i<dim; i++)
 
 2475                for(j=0; j<dim; j++)
 
 2476                    VR[i*dim+j] = h->vr[j*dim+i];
 
 
 2489typedef struct _utility_ceig_data {
 
 2492    float_complex *w, *vl, *vr, *a;
 
 2494    float_complex* work;
 
 
 2503    h->currentWorkSize = 0;
 
 2504    h->rwork = 
malloc1d(4*maxDim*
sizeof(
float));
 
 2505    h->w = 
malloc1d(maxDim*
sizeof(float_complex));
 
 2506    h->vl = 
malloc1d(maxDim*maxDim*
sizeof(float_complex));
 
 2507    h->vr = 
malloc1d(maxDim*maxDim*
sizeof(float_complex));
 
 2508    h->a = 
malloc1d(maxDim*maxDim*
sizeof(float_complex));
 
 
 2533    const float_complex* A,
 
 2544    float_complex wkopt;
 
 2546    n = lda = ldvl = ldvr = dim;
 
 2554        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 2559    for(i=0; i<dim; i++)
 
 2560        for(j=0; j<dim; j++)
 
 2561            h->a[i*dim+j] = A[j*dim+i];
 
 2565#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 2568#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 2570#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 2575    if(lwork>h->currentWorkSize){
 
 2576        h->currentWorkSize = lwork;
 
 2577        h->work = 
realloc1d(h->work, h->currentWorkSize*
sizeof(float_complex));
 
 2581#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 2584#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 2586#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 2593        memset(D, 0, dim*dim*
sizeof(float_complex));
 
 2598            memset(VL, 0, dim*dim*
sizeof(float_complex));
 
 2600            memset(VR, 0, dim*dim*
sizeof(float_complex));
 
 2602            memset(eig, 0, dim*
sizeof(float_complex));
 
 2608        saf_print_warning(
"Could not compute EVD in utility_ceig(). Output matrices/vectors have been zeroed.");
 
 2613        for(i=0; i<dim; i++){
 
 2615                for(j=0; j<dim; j++)
 
 2616                    VL[i*dim+j] = h->vl[j*dim+i];
 
 2618                for(j=0; j<dim; j++)
 
 2619                    VR[i*dim+j] = h->vr[j*dim+i];
 
 2621                D[i*dim+i] = h->w[i]; 
 
 
 2632typedef struct _utility_zeig_data {
 
 2635    double_complex *w, *vl, *vr, *a;
 
 2637    double_complex* work;
 
 
 2646    h->currentWorkSize = 0;
 
 2647    h->rwork = 
malloc1d(4*maxDim*
sizeof(
double));
 
 2648    h->w = 
malloc1d(maxDim*
sizeof(double_complex));
 
 2649    h->vl = 
malloc1d(maxDim*maxDim*
sizeof(double_complex));
 
 2650    h->vr = 
malloc1d(maxDim*maxDim*
sizeof(double_complex));
 
 2651    h->a = 
malloc1d(maxDim*maxDim*
sizeof(double_complex));
 
 
 2676    const double_complex* A,
 
 2687    double_complex wkopt;
 
 2689    n = lda = ldvl = ldvr = dim;
 
 2697        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 2702    for(i=0; i<dim; i++)
 
 2703        for(j=0; j<dim; j++)
 
 2704            h->a[i*dim+j] = A[j*dim+i];
 
 2708#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 2711#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 2713#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 2718    if(lwork>h->currentWorkSize){
 
 2719        h->currentWorkSize = lwork;
 
 2720        h->work = 
realloc1d(h->work, h->currentWorkSize*
sizeof(double_complex));
 
 2724#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 2727#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 2729#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 2736        memset(D, 0, dim*dim*
sizeof(double_complex));
 
 2741            memset(VL, 0, dim*dim*
sizeof(double_complex));
 
 2743            memset(VR, 0, dim*dim*
sizeof(double_complex));
 
 2745            memset(eig, 0, dim*
sizeof(double_complex));
 
 2751        saf_print_warning(
"Could not compute EVD in utility_zeig(). Output matrices/vectors have been zeroed.");
 
 2756        for(i=0; i<dim; i++){
 
 2758                for(j=0; j<dim; j++)
 
 2759                    VL[i*dim+j] = h->vl[j*dim+i];
 
 2761                for(j=0; j<dim; j++)
 
 2762                    VR[i*dim+j] = h->vr[j*dim+i];
 
 2764                D[i*dim+i] = h->w[i]; 
 
 
 2780typedef struct _utility_sglslv_data {
 
 
 2794    h->maxNCol = maxNCol;
 
 2797    h->a = 
malloc1d(maxDim*maxDim*
sizeof(
float));
 
 2798    h->b = 
malloc1d(maxDim*maxNCol*
sizeof(
float));
 
 
 2827    veclib_int n = dim, nrhs = nCol, lda = dim, ldb = dim, info;
 
 2828#if !defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) && !(defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64)) 
 2838        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 2839        saf_assert(nCol<=h->maxNCol, 
"nCol exceeds the maximum length specified");
 
 2843#if defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 2845    cblas_scopy(dim*dim, A, 1, h->a, 1);
 
 2846    cblas_scopy(dim*nCol, B, 1, h->b, 1);
 
 2849# if defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 2850    MKL_Somatcopy(
'R', 
'T', dim, dim, 1.0f, A, dim, h->a, dim);
 
 2851    MKL_Somatcopy(
'R', 
'T', dim, nCol, 1.0f, B, nCol, h->b, dim);
 
 2853    for(i=0; i<dim; i++)
 
 2854        for(j=0; j<dim; j++)
 
 2855            h->a[i*dim+j] = A[j*dim+i];
 
 2856    for(i=0; i<dim; i++)
 
 2857        for(j=0; j<nCol; j++)
 
 2858            h->b[j*dim+i] = B[i*nCol+j];
 
 2863#ifdef SAF_VECLIB_USE_CLAPACK_INTERFACE 
 2864    info = clapack_sgesv(CblasColMajor, n, nrhs, h->a, lda, h->IPIV, h->b, ldb);
 
 2865#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 2866    info = LAPACKE_sgesv_work(CblasRowMajor, n, nrhs, h->a, lda, h->IPIV, h->b, ldb);
 
 2867#elif defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 2868    sgesv_( &n, &nrhs, h->a, &lda, h->IPIV, h->b, &ldb, &info );
 
 2873        memset(X, 0, dim*nCol*
sizeof(
float));
 
 2878        saf_print_warning(
"Could not solve the linear equation in utility_sglslv(). Output matrices/vectors have been zeroed.");
 
 2882#if defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 2883        cblas_scopy(dim*nCol, h->b, 1, X, 1);
 
 2886# if defined(SAF_USE_INTEL_MKL_LP64) || defined(SAF_USE_INTEL_MKL_ILP64) 
 2887        MKL_Somatcopy(
'R', 
'T', nCol, dim, 1.0f, h->b, dim, X, nCol);
 
 2889        for(i=0; i<dim; i++)
 
 2890            for(j=0; j<nCol; j++)
 
 2891                X[i*nCol+j] = h->b[j*dim+i];
 
 
 2901typedef struct _utility_cglslv_data {
 
 
 2915    h->maxNCol = maxNCol;
 
 2918    h->a = 
malloc1d(maxDim*maxDim*
sizeof(float_complex));
 
 2919    h->b = 
malloc1d(maxDim*maxNCol*
sizeof(float_complex));
 
 
 2940    const float_complex* A,
 
 2961        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 2962        saf_assert(nCol<=h->maxNCol, 
"nCol exceeds the maximum length specified");
 
 2967    for(i=0; i<dim; i++)
 
 2968        for(j=0; j<dim; j++)
 
 2969            h->a[j*dim+i] = A[i*dim+j];
 
 2970    for(i=0; i<dim; i++)
 
 2971        for(j=0; j<nCol; j++)
 
 2972            h->b[j*dim+i] = B[i*nCol+j];
 
 2975#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 2977#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 2979#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 2985        memset(X, 0, dim*nCol*
sizeof(float_complex));
 
 2990        saf_print_warning(
"Could not solve the linear equation in utility_cglslv(). Output matrices/vectors have been zeroed.");
 
 2995        for(i=0; i<dim; i++)
 
 2996            for(j=0; j<nCol; j++)
 
 2997                X[i*nCol+j] = h->b[j*dim+i];
 
 
 3005typedef struct _utility_dglslv_data {
 
 
 3019    h->maxNCol = maxNCol;
 
 3021    h->a = 
malloc1d(maxDim*maxDim*
sizeof(
double));
 
 3022    h->b = 
malloc1d(maxDim*maxNCol*
sizeof(
double));
 
 
 3064        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 3065        saf_assert(nCol<=h->maxNCol, 
"nCol exceeds the maximum length specified");
 
 3070    for(i=0; i<dim; i++)
 
 3071        for(j=0; j<dim; j++)
 
 3072            h->a[j*dim+i] = A[i*dim+j];
 
 3073    for(i=0; i<dim; i++)
 
 3074        for(j=0; j<nCol; j++)
 
 3075            h->b[j*dim+i] = B[i*nCol+j];
 
 3078#ifdef SAF_VECLIB_USE_CLAPACK_INTERFACE 
 3079    info = clapack_dgesv(CblasColMajor, n, nrhs, h->a, lda, h->IPIV, h->b, ldb);
 
 3080#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 3081    info = LAPACKE_dgesv_work(CblasColMajor, n, nrhs, h->a, lda, h->IPIV, h->b, ldb);
 
 3082#elif defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 3083    dgesv_( &n, &nrhs, h->a, &lda, h->IPIV, h->b, &ldb, &info );
 
 3088        memset(X, 0, dim*nCol*
sizeof(
double));
 
 3093        saf_print_warning(
"Could not solve the linear equation in utility_dglslv(). Output matrices/vectors have been zeroed.");
 
 3098        for(i=0; i<dim; i++)
 
 3099            for(j=0; j<nCol; j++)
 
 3100                X[i*nCol+j] = h->b[j*dim+i];
 
 
 3108typedef struct _utility_zglslv_data {
 
 
 3122    h->maxNCol = maxNCol; 
 
 3124    h->a = 
malloc1d(maxDim*maxDim*
sizeof(double_complex));
 
 3125    h->b = 
malloc1d(maxDim*maxNCol*
sizeof(double_complex));
 
 
 3146    const double_complex* A,
 
 3154    veclib_int i, j, n = dim, nrhs = nCol, lda = dim, ldb = dim, info;
 
 3167        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 3168        saf_assert(nCol<=h->maxNCol, 
"nCol exceeds the maximum length specified");
 
 3173    for(i=0; i<dim; i++)
 
 3174        for(j=0; j<dim; j++)
 
 3175            h->a[j*dim+i] = A[i*dim+j];
 
 3176    for(i=0; i<dim; i++)
 
 3177        for(j=0; j<nCol; j++)
 
 3178            h->b[j*dim+i] = B[i*nCol+j];
 
 3181#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 3183#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 3185#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 3191        memset(X, 0, dim*nCol*
sizeof(double_complex));
 
 3196        saf_print_warning(
"Could not solve the linear equation in utility_zglslv(). Output matrices/vectors have been zeroed.");
 
 3201        for(i=0; i<dim; i++)
 
 3202            for(j=0; j<nCol; j++)
 
 3203                X[i*nCol+j] = h->b[j*dim+i];
 
 
 3216typedef struct _utility_sglslvt_data {
 
 
 3230    h->maxNCol = maxNCol;
 
 3232    h->a = 
malloc1d(maxDim*maxDim*
sizeof(
float));
 
 3233    h->b = 
malloc1d(maxDim*maxNCol*
sizeof(
float));
 
 
 3262    veclib_int n = nCol, nrhs = dim, lda = nCol, ldb = nCol, info;
 
 3270        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 3271        saf_assert(nCol<=h->maxNCol, 
"nCol exceeds the maximum length specified");
 
 3276    cblas_scopy(dim*dim, A, 1, h->a, 1);
 
 3277    cblas_scopy(dim*nCol, B, 1, h->b, 1);
 
 3280#ifdef SAF_VECLIB_USE_CLAPACK_INTERFACE 
 3281    info = clapack_sgesv(CblasColMajor, n, nrhs, h->b, ldb, h->IPIV, h->a, lda);
 
 3282#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 3283    info = LAPACKE_sgesv_work(CblasColMajor, n, nrhs, h->b, ldb, h->IPIV, h->a, lda);
 
 3284#elif defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 3286    sgesv_( &n, &nrhs, h->b, &ldb, h->IPIV, h->a, &lda, &info );
 
 3291        memset(X, 0, dim*nCol*
sizeof(
float));
 
 3296        saf_print_warning(
"Could not solve the linear equation in utility_sglslvt(). Output matrices/vectors have been zeroed.");
 
 3300        cblas_scopy(dim*nCol, h->a, 1, X, 1);
 
 
 3312typedef struct _utility_sslslv_data {
 
 
 3325    h->maxNCol = maxNCol;
 
 3326    h->a = 
malloc1d(maxDim*maxDim*
sizeof(
float));
 
 3327    h->b = 
malloc1d(maxDim*maxNCol*
sizeof(
float));
 
 
 3368        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 3369        saf_assert(nCol<=h->maxNCol, 
"nCol exceeds the maximum length specified");
 
 3374    for(i=0; i<dim; i++)
 
 3375        for(j=0; j<dim; j++)
 
 3376            h->a[j*dim+i] = A[i*dim+j];
 
 3377    for(i=0; i<dim; i++)
 
 3378        for(j=0; j<nCol; j++)
 
 3379            h->b[j*dim+i] = B[i*nCol+j];
 
 3382#ifdef SAF_VECLIB_USE_CLAPACK_INTERFACE 
 3383    info = clapack_sposv(CblasColMajor, CblasUpper, n, nrhs, h->a, lda, h->b, ldb);
 
 3384#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 3385    info = LAPACKE_sposv_work(CblasColMajor, CblasUpper, n, nrhs, h->a, lda, h->b, ldb);
 
 3386#elif defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 3387    sposv_( 
"U", &n, &nrhs, h->a, &lda, h->b, &ldb, &info );
 
 3392        memset(X, 0, dim*nCol*
sizeof(
float));
 
 3397        saf_print_warning(
"Could not solve the linear equation in utility_sslslv(). Output matrices/vectors have been zeroed.");
 
 3402        for(i=0; i<dim; i++)
 
 3403            for(j=0; j<nCol; j++)
 
 3404                X[i*nCol+j] = h->b[j*dim+i];
 
 
 3412typedef struct _utility_cslslv_data {
 
 
 3425    h->maxNCol = maxNCol;
 
 3426    h->a = 
malloc1d(maxDim*maxDim*
sizeof(float_complex));
 
 3427    h->b = 
malloc1d(maxDim*maxNCol*
sizeof(float_complex));
 
 
 3447    const float_complex* A,
 
 3468        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 3469        saf_assert(nCol<=h->maxNCol, 
"nCol exceeds the maximum length specified");
 
 3474    for(i=0; i<dim; i++)
 
 3475        for(j=0; j<dim; j++)
 
 3476            h->a[j*dim+i] = A[i*dim+j];
 
 3477    for(i=0; i<dim; i++)
 
 3478        for(j=0; j<nCol; j++)
 
 3479    h->b[j*dim+i] = B[i*nCol+j];
 
 3482#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 3484#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 3486#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 3492        memset(X, 0, dim*nCol*
sizeof(float_complex));
 
 3497        saf_print_warning(
"Could not solve the linear equation in utility_cslslv(). Output matrices/vectors have been zeroed.");
 
 3502        for(i=0; i<dim; i++)
 
 3503            for(j=0; j<nCol; j++)
 
 3504                X[i*nCol+j] = h->b[j*dim+i];
 
 
 3517typedef struct _utility_spinv_data {
 
 3518    int maxDim1, maxDim2;
 
 3520    float* a, *s, *u, *vt, *inva;
 
 
 3529    h->maxDim1 = maxDim1;
 
 3530    h->maxDim2 = maxDim2;
 
 3531    h->currentWorkSize = 0;
 
 3532    h->a = 
malloc1d(maxDim1*maxDim2*
sizeof(
float));
 
 3534    h->u = 
malloc1d(maxDim1*maxDim1*
sizeof(
float));
 
 3535    h->vt = 
malloc1d(maxDim2*maxDim2*
sizeof(
float));
 
 3536    h->inva = 
malloc1d(maxDim1*maxDim2*
sizeof(
float));
 
 
 3568    veclib_int i, j, m, n, k, lda, ldu, ldvt, ld_inva, info;
 
 3573    m = lda = ldu = dim1;
 
 3583        saf_assert(dim1<=h->maxDim1, 
"dim1 exceeds the maximum length specified");
 
 3584        saf_assert(dim2<=h->maxDim2, 
"dim2 exceeds the maximum length specified");
 
 3591            h->a[j*m+i] = inM[i*n+j];
 
 3595#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 3596    sgesvd_(
"S", 
"S", &m, &n, h->a, &lda, h->s, h->u, &ldu, h->vt, &ldvt, &wkopt, &lwork, &info);
 
 3597#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 3599#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 3600    info = LAPACKE_sgesvd_work(CblasColMajor, 
'S', 
'S', m, n, h->a, lda, h->s, h->u, ldu, h->vt, ldvt, &wkopt, lwork);
 
 3603    if(lwork>h->currentWorkSize){
 
 3604        h->currentWorkSize = lwork;
 
 3605        h->work = 
realloc1d(h->work, h->currentWorkSize*
sizeof(
float));
 
 3609#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 3610    sgesvd_(
"S", 
"S", &m, &n, h->a, &lda, h->s, h->u, &ldu, h->vt, &ldvt, h->work, &lwork, &info );
 
 3611#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 3613#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 3614    info = LAPACKE_sgesvd_work(CblasColMajor, 
'S', 
'S', m, n, h->a, lda, h->s, h->u, ldu, h->vt, ldvt, h->work, lwork);
 
 3618        memset(outM, 0, dim1*dim2*
sizeof(
float));
 
 3623        saf_print_warning(
"Could not compute SVD in utility_spinv(). Output matrices/vectors have been zeroed.");
 
 3628            if(h->s[i] > 1.0e-5f)
 
 3632            cblas_sscal(m, ss, &h->u[i*m], 1);
 
 3635        cblas_sgemm(CblasColMajor, CblasTrans, CblasTrans, n, m, k, 1.0f,
 
 3643                outM[j*m+i] = h->inva[i*n+j];
 
 
 3652typedef struct _utility_cpinv_data {
 
 3653    int maxDim1, maxDim2;
 
 3655    float_complex* a, *u, *vt, *inva;
 
 3657    float_complex* work;
 
 
 3665    h->maxDim1 = maxDim1;
 
 3666    h->maxDim2 = maxDim2;
 
 3667    h->currentWorkSize = 0;
 
 3668    h->a = 
malloc1d(maxDim1*maxDim2*
sizeof(float_complex));
 
 3670    h->u = 
malloc1d(maxDim1*maxDim1*
sizeof(float_complex));
 
 3671    h->vt = 
malloc1d(maxDim2*maxDim2*
sizeof(float_complex));
 
 3672    h->inva = 
malloc1d(maxDim1*maxDim2*
sizeof(float_complex));
 
 
 3698    const float_complex* inM,
 
 3705    veclib_int i, j, m, n, k, lda, ldu, ldvt, ld_inva, info;
 
 3706    float_complex  ss_cmplx;
 
 3707    const float_complex calpha = cmplxf(1.0f, 0.0f); 
const float_complex cbeta = cmplxf(0.0f, 0.0f); 
 
 3710    float_complex wkopt;
 
 3712    m = lda = ldu = dim1;
 
 3722        saf_assert(dim1<=h->maxDim1, 
"dim1 exceeds the maximum length specified");
 
 3723        saf_assert(dim2<=h->maxDim2, 
"dim2 exceeds the maximum length specified");
 
 3728    for(i=0; i<dim1; i++)
 
 3729        for(j=0; j<dim2; j++)
 
 3730            h->a[j*dim1+i] = inM[i*dim2 +j];
 
 3734#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 3735    cgesvd_( 
"A", 
"A", &m, &n, (
veclib_float_complex*)h->a, &lda, h->s, (
veclib_float_complex*)h->u, &ldu, (
veclib_float_complex*)h->vt, &ldvt,
 
 3737#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 3739#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 3740    info = LAPACKE_cgesvd_work(CblasColMajor, 
'S', 
'S', m, n, (
veclib_float_complex*)h->a, lda, h->s, (
veclib_float_complex*)h->u, ldu, (
veclib_float_complex*)h->vt, ldvt,
 
 3744    if(lwork>h->currentWorkSize){
 
 3745        h->currentWorkSize = lwork;
 
 3746        h->work = 
realloc1d(h->work, h->currentWorkSize*
sizeof(float_complex));
 
 3750#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 3751    cgesvd_( 
"A", 
"A", &m, &n, (
veclib_float_complex*)h->a, &lda, h->s, (
veclib_float_complex*)h->u, &ldu, (
veclib_float_complex*)h->vt, &ldvt,
 
 3753#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 3755#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 3756    info = LAPACKE_cgesvd_work(CblasColMajor, 
'S', 
'S', m, n, (
veclib_float_complex*)h->a, lda, h->s, (
veclib_float_complex*)h->u, ldu, (
veclib_float_complex*)h->vt, ldvt,
 
 3761        memset(outM, 0, dim1*dim2*
sizeof(float_complex));
 
 3766        saf_print_warning(
"Could not compute SVD in utility_cpinv(). Output matrices/vectors have been zeroed.");
 
 3771            if(h->s[i] > 1.0e-5f)
 
 3775            ss_cmplx = cmplxf(ss, 0.0f);
 
 3776            cblas_cscal(m, &ss_cmplx, &h->u[i*m], 1);
 
 3779        cblas_cgemm(CblasColMajor, CblasConjTrans, CblasConjTrans, n, m, k, &calpha,
 
 3787                outM[j*m+i] = h->inva[i*n+j];
 
 
 3796typedef struct _utility_dpinv_data {
 
 3797    int maxDim1, maxDim2;
 
 3799    double* a, *s, *u, *vt, *inva;
 
 
 3808    h->maxDim1 = maxDim1;
 
 3809    h->maxDim2 = maxDim2;
 
 3810    h->currentWorkSize = 0;
 
 3811    h->a = 
malloc1d(maxDim1*maxDim2*
sizeof(
double));
 
 3813    h->u = 
malloc1d(maxDim1*maxDim1*
sizeof(
double));
 
 3814    h->vt = 
malloc1d(maxDim2*maxDim2*
sizeof(
double));
 
 3815    h->inva = 
malloc1d(maxDim1*maxDim2*
sizeof(
double));
 
 
 3847    veclib_int i, j, m, n, k, lda, ldu, ldvt, ld_inva, info;
 
 3852    m = lda = ldu = dim1;
 
 3862        saf_assert(dim1<=h->maxDim1, 
"dim1 exceeds the maximum length specified");
 
 3863        saf_assert(dim2<=h->maxDim2, 
"dim2 exceeds the maximum length specified");
 
 3870            h->a[j*m+i] = inM[i*n+j];
 
 3874#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 3875    dgesvd_(
"S", 
"S", &m, &n, h->a, &lda, h->s, h->u, &ldu, h->vt, &ldvt, &wkopt, &lwork, &info);
 
 3876#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 3878#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 3879    info = LAPACKE_dgesvd_work(CblasColMajor, 
'S', 
'S', m, n, h->a, lda, h->s, h->u, ldu, h->vt, ldvt, &wkopt, lwork);
 
 3882    if(lwork>h->currentWorkSize){
 
 3883        h->currentWorkSize = lwork;
 
 3884        h->work = 
realloc1d(h->work, h->currentWorkSize*
sizeof(
double));
 
 3888#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 3889    dgesvd_(
"S", 
"S", &m, &n, h->a, &lda, h->s, h->u, &ldu, h->vt, &ldvt, h->work, &lwork, &info );
 
 3890#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 3892#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 3893    info = LAPACKE_dgesvd_work(CblasColMajor, 
'S', 
'S', m, n, h->a, lda, h->s, h->u, ldu, h->vt, ldvt, h->work, lwork);
 
 3897        memset(outM, 0, dim1*dim2*
sizeof(
double));
 
 3902        saf_print_warning(
"Could not compute SVD in utility_dpinv(). Output matrices/vectors have been zeroed.");
 
 3907            if(h->s[i] > 1.0e-9)
 
 3911            cblas_dscal(m, ss, &h->u[i*m], 1);
 
 3914        cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, n, m, k, 1.0,
 
 3922                outM[j*m+i] = h->inva[i*n+j];
 
 
 3931typedef struct _utility_zpinv_data {
 
 3932    int maxDim1, maxDim2;
 
 3934    double_complex* a, *u, *vt, *inva;
 
 3936    double_complex* work;
 
 
 3944    h->maxDim1 = maxDim1;
 
 3945    h->maxDim2 = maxDim2;
 
 3946    h->currentWorkSize = 0;
 
 3947    h->a = 
malloc1d(maxDim1*maxDim2*
sizeof(double_complex));
 
 3949    h->u = 
malloc1d(maxDim1*maxDim1*
sizeof(double_complex));
 
 3950    h->vt = 
malloc1d(maxDim2*maxDim2*
sizeof(double_complex));
 
 3951    h->inva = 
malloc1d(maxDim1*maxDim2*
sizeof(double_complex));
 
 
 3977    const double_complex* inM,
 
 3980    double_complex* outM
 
 3984    veclib_int i, j, m, n, k, lda, ldu, ldvt, info, ld_inva;
 
 3985    double_complex ss_cmplx;
 
 3986    const double_complex calpha = cmplx(1.0, 0.0); 
const double_complex cbeta = cmplx(0.0, 0.0); 
 
 3989    double_complex wkopt;
 
 3991    m = lda = ldu = dim1;
 
 4001        saf_assert(dim1<=h->maxDim1, 
"dim1 exceeds the maximum length specified");
 
 4002        saf_assert(dim2<=h->maxDim2, 
"dim2 exceeds the maximum length specified");
 
 4007    for(i=0; i<dim1; i++)
 
 4008        for(j=0; j<dim2; j++)
 
 4009            h->a[j*dim1+i] = inM[i*dim2 +j];
 
 4013#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 4014    zgesvd_( 
"A", 
"A", &m, &n, (
veclib_double_complex*)h->a, &lda, h->s, (
veclib_double_complex*)h->u, &ldu, (
veclib_double_complex*)h->vt, &ldvt,
 
 4016#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 4018#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 4019    info = LAPACKE_zgesvd_work(CblasColMajor, 
'A', 
'A', m, n, (
veclib_double_complex*)h->a, lda, h->s, (
veclib_double_complex*)h->u, ldu, (
veclib_double_complex*)h->vt, ldvt,
 
 4023    if(lwork>h->currentWorkSize){
 
 4024        h->currentWorkSize = lwork;
 
 4025        h->work = 
realloc1d(h->work, h->currentWorkSize*
sizeof(double_complex));
 
 4029#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 4030    zgesvd_( 
"A", 
"A", &m, &n, (
veclib_double_complex*)h->a, &lda, h->s, (
veclib_double_complex*)h->u, &ldu, (
veclib_double_complex*)h->vt, &ldvt,
 
 4032#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 4034#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 4035    info = LAPACKE_zgesvd_work(CblasColMajor, 
'A', 
'A', m, n, (
veclib_double_complex*)h->a, lda, h->s, (
veclib_double_complex*)h->u, ldu, (
veclib_double_complex*)h->vt, ldvt,
 
 4040        memset(outM, 0, dim1*dim2*
sizeof(double_complex));
 
 4045        saf_print_warning(
"Could not compute SVD in utility_zpinv(). Output matrices/vectors have been zeroed.");
 
 4050            if(h->s[i] > 1.0e-5)
 
 4054            ss_cmplx = cmplx(ss, 0.0);
 
 4055            cblas_zscal(m, &ss_cmplx, &h->u[i*m], 1);
 
 4058        cblas_zgemm(CblasColMajor, CblasConjTrans, CblasConjTrans, n, m, k, &calpha,
 
 4066                outM[j*m+i] = h->inva[i*n+j];
 
 
 4080typedef struct _utility_schol_data {
 
 
 4091    h->a = 
malloc1d(maxDim*maxDim*
sizeof(
float));
 
 
 4126        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 4131    for(i=0; i<dim; i++)
 
 4132        for(j=0; j<dim; j++)
 
 4133            h->a[j*dim+i] = A[i*dim+j];
 
 4136#ifdef SAF_VECLIB_USE_CLAPACK_INTERFACE 
 4137    info = clapack_spotrf(CblasColMajor, CblasUpper, n, h->a, lda);
 
 4138#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 4139    info = LAPACKE_spotrf_work(CblasColMajor, CblasUpper, n, h->a, lda);
 
 4140#elif defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 4141    spotrf_( 
"U", &n, h->a, &lda, &info );
 
 4146        memset(X, 0, dim*dim*
sizeof(
float));
 
 4152        saf_print_warning(
"Could not compute the Cholesky factorisation in utility_schol(). Output matrices/vectors have been zeroed.");
 
 4157        for(i=0; i<dim; i++)
 
 4158            for(j=0; j<dim; j++)
 
 4159                X[i*dim+j] = j>=i ? h->a[j*dim+i] : 0.0f;
 
 
 4168typedef struct _utility_cchol_data {
 
 
 4179    h->a = 
malloc1d(maxDim*maxDim*
sizeof(float_complex));
 
 
 4198    const float_complex* A,
 
 4214        saf_assert(dim<=h->maxDim, 
"dim exceeds the maximum length specified");
 
 4219    for(i=0; i<dim; i++)
 
 4220        for(j=0; j<dim; j++)
 
 4221            h->a[j*dim+i] = A[i*dim+j];
 
 4224#if defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 4226#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 4228#elif defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 4234        memset(X, 0, dim*dim*
sizeof(float_complex));
 
 4240        saf_print_warning(
"Could not compute the Cholesky factorisation in utility_cchol(). Output matrices/vectors have been zeroed.");
 
 4245        for(i=0; i<dim; i++)
 
 4246            for(j=0; j<dim; j++)
 
 4247                X[i*dim+j] = j>=i ? h->a[j*dim+i] : cmplxf(0.0f, 0.0f);
 
 
 4260typedef struct _utility_sdet_data {
 
 
 4273    h->tmp = 
malloc1d(maxN*maxN*
sizeof(
float));
 
 
 4303        return A[0]*A[3] - A[2]*A[1];
 
 4306        return A[0] * ((A[4]*A[8]) - (A[7]*A[5])) -A[1] * (A[3]
 
 4307           * A[8] - A[6] * A[5]) + A[2] * (A[3] * A[7] - A[6] * A[4]);
 
 4311        A[3] * A[6] * A[9] * A[12] - A[2] * A[7] * A[9] * A[12] -
 
 4312        A[3] * A[5] * A[10] * A[12] + A[1] * A[7] * A[10] * A[12] +
 
 4313        A[2] * A[5] * A[11] * A[12] - A[1] * A[6] * A[11] * A[12] -
 
 4314        A[3] * A[6] * A[8] * A[13] + A[2] * A[7] * A[8] * A[13] +
 
 4315        A[3] * A[4] * A[10] * A[13] - A[0] * A[7] * A[10] * A[13] -
 
 4316        A[2] * A[4] * A[11] * A[13] + A[0] * A[6] * A[11] * A[13] +
 
 4317        A[3] * A[5] * A[8] * A[14] - A[1] * A[7] * A[8] * A[14] -
 
 4318        A[3] * A[4] * A[9] * A[14] + A[0] * A[7] * A[9] * A[14] +
 
 4319        A[1] * A[4] * A[11] * A[14] - A[0] * A[5] * A[11] * A[14] -
 
 4320        A[2] * A[5] * A[8] * A[15] + A[1] * A[6] * A[8] * A[15] +
 
 4321        A[2] * A[4] * A[9] * A[15] - A[0] * A[6] * A[9] * A[15] -
 
 4322        A[1] * A[4] * A[10] * A[15] + A[0] * A[5] * A[10] * A[15];
 
 4332        saf_assert(N<=h->maxN, 
"N exceeds the maximum length specified");
 
 4339            h->tmp[j*N+i] = A[i*N+j];
 
 4341#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 4343#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 4344    INFO = clapack_sgetrf(CblasColMajor, N, N, h->tmp, N, h->IPIV);
 
 4345#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 4346    INFO = LAPACKE_sgetrf_work(CblasColMajor, N, N, h->tmp, N, h->IPIV);
 
 4354        saf_print_warning(
"Unable to compute determinant of input matrix. The function utility_sdet() returned 0. ");
 
 4359        for( i = 0; i < N; i++ ) {
 
 4361            if(h->IPIV[i] != i+1)
 
 
 4374typedef struct _utility_ddet_data {
 
 4375    int currentWorkSize;
 
 4378    double *tmp, *TAU, *WORK;
 
 
 4387    h->currentWorkSize = 0;
 
 4389    h->tmp = 
malloc1d(maxN*maxN*
sizeof(
double));
 
 4390    h->TAU = 
malloc1d(maxN*
sizeof(
double));
 
 
 4423        return A[0]*A[3] - A[2]*A[1];
 
 4426        return A[0] * ((A[4]*A[8]) - (A[7]*A[5])) -A[1] * (A[3]
 
 4427           * A[8] - A[6] * A[5]) + A[2] * (A[3] * A[7] - A[6] * A[4]);
 
 4431        A[3] * A[6] * A[9] * A[12] - A[2] * A[7] * A[9] * A[12] -
 
 4432        A[3] * A[5] * A[10] * A[12] + A[1] * A[7] * A[10] * A[12] +
 
 4433        A[2] * A[5] * A[11] * A[12] - A[1] * A[6] * A[11] * A[12] -
 
 4434        A[3] * A[6] * A[8] * A[13] + A[2] * A[7] * A[8] * A[13] +
 
 4435        A[3] * A[4] * A[10] * A[13] - A[0] * A[7] * A[10] * A[13] -
 
 4436        A[2] * A[4] * A[11] * A[13] + A[0] * A[6] * A[11] * A[13] +
 
 4437        A[3] * A[5] * A[8] * A[14] - A[1] * A[7] * A[8] * A[14] -
 
 4438        A[3] * A[4] * A[9] * A[14] + A[0] * A[7] * A[9] * A[14] +
 
 4439        A[1] * A[4] * A[11] * A[14] - A[0] * A[5] * A[11] * A[14] -
 
 4440        A[2] * A[5] * A[8] * A[15] + A[1] * A[6] * A[8] * A[15] +
 
 4441        A[2] * A[4] * A[9] * A[15] - A[0] * A[6] * A[9] * A[15] -
 
 4442        A[1] * A[4] * A[10] * A[15] + A[0] * A[5] * A[10] * A[15];
 
 4452        saf_assert(N<=h->maxN, 
"N exceeds the maximum length specified");
 
 4459            h->tmp[j*N+i] = A[i*N+j];
 
 4463#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 4464    dgeqrf_(&N, &N, h->tmp, &N, h->TAU, &lwork2, &LWORK, &INFO);
 
 4465#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 4467#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 4468    INFO = LAPACKE_dgeqrf_work(CblasColMajor, N, N, h->tmp, N, h->TAU, &lwork2, LWORK);
 
 4471    if(lwork3>h->currentWorkSize){
 
 4472        h->currentWorkSize = lwork3;
 
 4473        h->WORK = 
realloc1d(h->WORK, h->currentWorkSize*
sizeof(
double));
 
 4477#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 4478    dgeqrf_(&N, &N, h->tmp, &N, h->TAU, h->WORK, &lwork3, &INFO);
 
 4479#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 4481#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 4482    INFO = LAPACKE_dgeqrf_work(CblasColMajor, N, N, h->tmp, N, h->TAU, h->WORK, lwork3);
 
 4490        saf_print_warning(
"Unable to compute determinant of input matrix. The function utility_ddet() returned 0. ");
 
 
 4515typedef struct _utility_sinv_data {
 
 
 4528    h->tmp = 
malloc1d(maxN*maxN*
sizeof(
float));
 
 4529    h->WORK = 
malloc1d(maxN*maxN*
sizeof(
float));
 
 
 4569        saf_assert(N<=h->maxN, 
"N exceeds the maximum length specified");
 
 4576            h->tmp[j*N+i] = A[i*N+j];
 
 4578#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 4581#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 4582    INFO = clapack_sgetrf(CblasColMajor, N_, N_, h->tmp, N_, h->IPIV);
 
 4583    INFO = clapack_sgetri(CblasColMajor, N_, h->tmp, N_, h->IPIV);
 
 4584#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 4585    INFO = LAPACKE_sgetrf_work(CblasColMajor, N_, N_, h->tmp, N_, h->IPIV);
 
 4586    INFO = LAPACKE_sgetri_work(CblasColMajor, N_, h->tmp, N_, h->IPIV, h->WORK, LWORK);
 
 4590        memset(B, 0, N*N*
sizeof(
float));
 
 4595        saf_print_warning(
"Unable to compute the inverse of input matrix. The function utility_sinv() returned a matrix of zeros. ");
 
 4602                B[j*N+i] = h->tmp[i*N+j];
 
 
 4611typedef struct _utility_dinv_data {
 
 
 4624    h->tmp = 
malloc1d(maxN*maxN*
sizeof(
double));
 
 4625    h->WORK = 
malloc1d(maxN*maxN*
sizeof(
double));
 
 
 4665        saf_assert(N<=h->maxN, 
"N exceeds the maximum length specified");
 
 4672            h->tmp[j*N+i] = A[i*N+j];
 
 4674#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 4677#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 4678    INFO = clapack_dgetrf(CblasColMajor, N_, N_, h->tmp, N_, h->IPIV);
 
 4679    INFO = clapack_dgetri(CblasColMajor, N_, h->tmp, N_, h->IPIV);
 
 4680#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 4681    INFO = LAPACKE_dgetrf_work(CblasColMajor, N_, N_, h->tmp, N_, h->IPIV);
 
 4682    INFO = LAPACKE_dgetri_work(CblasColMajor, N_, h->tmp, N_, h->IPIV, h->WORK, LWORK);
 
 4686        memset(B, 0, N*N*
sizeof(
double));
 
 4691        saf_print_warning(
"Unable to compute the inverse of input matrix. The function utility_dinv() returned a matrix of zeros. ");
 
 4698                B[j*N+i] = h->tmp[i*N+j];
 
 
 4707typedef struct _utility_cinv_data {
 
 4710    float_complex *WORK, *tmp;
 
 
 4720    h->tmp = 
malloc1d(maxN*maxN*
sizeof(float_complex));
 
 4721    h->WORK = 
malloc1d(maxN*maxN*
sizeof(float_complex));
 
 
 4761        saf_assert(N<=h->maxN, 
"N exceeds the maximum length specified");
 
 4768            h->tmp[j*N+i] = A[i*N+j];
 
 4770#if defined(SAF_VECLIB_USE_LAPACK_FORTRAN_INTERFACE) 
 4773#elif defined(SAF_VECLIB_USE_CLAPACK_INTERFACE) 
 4776#elif defined(SAF_VECLIB_USE_LAPACKE_INTERFACE) 
 4782        memset(B, 0, N*N*
sizeof(float_complex));
 
 4787        saf_print_warning(
"Unable to compute the inverse of input matrix. The function utility_cinv() returned a matrix of zeros. ");
 
 4794                B[j*N+i] = h->tmp[i*N+j];
 
 
void utility_dimaxv(const double *a, const int len, int *index)
Double-precision, index of maximum absolute value in a vector, i.e.
#define saf_print_error(message)
Macro to print a error message along with the filename and line number.
void utility_spinv_create(void **const phWork, int maxDim1, int maxDim2)
(Optional) Pre-allocate the working struct used by utility_spinv()
void utility_dinv(void *const hWork, double *A, double *B, const int N)
Matrix inversion: double precision, i.e.
double utility_ddet(void *const hWork, double *A, int N)
Determinant of a Matrix, double precision, i,e.
void utility_dinv_create(void **const phWork, int maxN)
(Optional) Pre-allocate the working struct used by utility_dinv()
void utility_zvvsub(const double_complex *a, const double_complex *b, const int len, double_complex *c)
Double-precision, complex, vector-vector subtraction, i.e.
void utility_ceigmp(void *const hWork, const float_complex *A, const float_complex *B, const int dim, float_complex *VL, float_complex *VR, float_complex *D)
Computes eigenvalues of a matrix pair using the QZ method, single precision complex,...
#define saf_assert(x, message)
Macro to make an assertion, along with a string explaining its purpose.
void utility_cpinv_create(void **const phWork, int maxDim1, int maxDim2)
(Optional) Pre-allocate the working struct used by utility_cpinv()
void utility_zsv2cv_inds(const double_complex *sv, const int *inds, const int len, double_complex *cv)
Double-precision complex, sparse-vector to compressed vector given known indices.
void utility_cglslv_destroy(void **const phWork)
De-allocate the working struct used by utility_cglslv()
void utility_cglslv(void *const hWork, const float_complex *A, const int dim, float_complex *B, int nCol, float_complex *X)
General linear solver: single precision complex, i.e.
void utility_ddet_destroy(void **const phWork)
De-allocate the working struct used by utility_ddet()
void utility_zeig_destroy(void **const phWork)
De-allocate the working struct used by utility_zeig()
void utility_cvvmul(const float_complex *a, const float_complex *b, const int len, float_complex *c)
Single-precision, complex, element-wise vector-vector multiplication i.e.
void utility_svvmul(const float *a, const float *b, const int len, float *c)
Single-precision, element-wise vector-vector multiplication i.e.
void utility_ceig_destroy(void **const phWork)
De-allocate the working struct used by utility_ceig()
void utility_cinv(void *const hWork, float_complex *A, float_complex *B, const int N)
Matrix inversion: double precision complex, i.e.
void utility_dvsmul(double *a, const double *s, const int len, double *c)
Double-precision, multiplies each element in vector 'a' with a scalar 's', i.e.
void utility_csv2cv_inds(const float_complex *sv, const int *inds, const int len, float_complex *cv)
Single-precision complex, sparse-vector to compressed vector given known indices.
void utility_svmod(const float *a, const float *b, const int len, float *c)
Single-precision, modulus of vector elements, i.e.
void utility_ziminv(const double_complex *a, const int len, int *index)
Double-precision, complex, index of maximum absolute value in a vector, i.e.
void utility_dvvsub(const double *a, const double *b, const int len, double *c)
Double-precision, vector-vector subtraction, i.e.
void utility_dsv2cv_inds(const double *sv, const int *inds, const int len, double *cv)
Double-precision, sparse-vector to compressed vector given known indices i.e.
#define SAF_FALSE
Boolean false.
void utility_csvd_destroy(void **const phWork)
De-allocate the working struct used by utility_csvd()
#define SAF_MAX(a, b)
Returns the maximum of the two values.
void utility_ceigmp_create(void **const phWork, int maxDim)
(Optional) Pre-allocate the working struct used by utility_ceigmp()
void utility_dglslv(void *const hWork, const double *A, const int dim, double *B, int nCol, double *X)
General linear solver: double precision, i.e.
void utility_zvsmul(double_complex *a, const double_complex *s, const int len, double_complex *c)
Double-precision, complex, multiplies each element in vector 'a' with a scalar 's',...
void utility_dglslv_destroy(void **const phWork)
De-allocate the working struct used by utility_dglslv()
void utility_schol_destroy(void **const phWork)
De-allocate the working struct used by utility_schol()
void utility_zeig_create(void **const phWork, int maxDim)
(Optional) Pre-allocate the working struct used by utility_zeig()
void utility_svsadd(float *a, const float *s, const int len, float *c)
Single-precision, adds each element in vector 'a' with a scalar 's', i.e.
void utility_cseig_destroy(void **const phWork)
De-allocate the working struct used by utility_cseig()
void utility_ciminv(const float_complex *a, const int len, int *index)
Single-precision, complex, index of maximum absolute value in a vector, i.e.
void utility_dvvadd(const double *a, const double *b, const int len, double *c)
Double-precision, vector-vector addition, i.e.
void utility_svssub(float *a, const float *s, const int len, float *c)
Single-precision, subtracts each element in vector 'a' with a scalar 's', i.e.
void utility_svvsub(const float *a, const float *b, const int len, float *c)
Single-precision, vector-vector subtraction, i.e.
void utility_ssvd_create(void **const phWork, int maxDim1, int maxDim2)
(Optional) Pre-allocate the working struct used by utility_ssvd()
void utility_sglslvt(void *const hWork, const float *A, const int dim, float *B, int nCol, float *X)
General linear solver (the other way): single precision, i.e.
void utility_ssvd(void *const hWork, const float *A, const int dim1, const int dim2, float *U, float *S, float *V, float *sing)
Singular value decomposition: single precision, i.e.
void utility_zimaxv(const double_complex *a, const int len, int *index)
Double-precision, complex, index of maximum absolute value in a vector, i.e.
void utility_zeigmp(void *const hWork, const double_complex *A, const double_complex *B, const int dim, double_complex *VL, double_complex *VR, double_complex *D)
Computes eigenvalues of a matrix pair using the QZ method, double precision complex,...
void utility_cinv_destroy(void **const phWork)
De-allocate the working struct used by utility_cinv()
void utility_ceig(void *const hWork, const float_complex *A, const int dim, float_complex *VL, float_complex *VR, float_complex *D, float_complex *eig)
Eigenvalue decomposition of a NON-SYMMETRIC matrix: single precision complex, i.e.
void utility_cvvadd(const float_complex *a, const float_complex *b, const int len, float_complex *c)
Single-precision, complex, vector-vector addition, i.e.
void utility_sinv_create(void **const phWork, int maxN)
(Optional) Pre-allocate the working struct used by utility_sinv()
void utility_zglslv(void *const hWork, const double_complex *A, const int dim, double_complex *B, int nCol, double_complex *X)
General linear solver: double precision complex, i.e.
void utility_sglslv(void *const hWork, const float *A, const int dim, float *B, int nCol, float *X)
General linear solver: single precision, i.e.
CONJ_FLAG
Whether a vector should be conjugated or not (e.g.
void utility_svvcopy(const float *a, const int len, float *c)
Single-precision, vector-vector copy, i.e.
void utility_cimaxv(const float_complex *a, const int len, int *index)
Single-precision, complex, index of maximum absolute value in a vector, i.e.
#define SAF_TRUE
Boolean true.
void utility_spinv(void *const hWork, const float *inM, const int dim1, const int dim2, float *outM)
General matrix pseudo-inverse (the svd way): single precision, i.e.
void utility_dpinv_create(void **const phWork, int maxDim1, int maxDim2)
(Optional) Pre-allocate the working struct used by utility_dpinv()
void utility_cpinv(void *const hWork, const float_complex *inM, const int dim1, const int dim2, float_complex *outM)
General matrix pseudo-inverse (the svd way): single precision complex, i.e.
void utility_cslslv_create(void **const phWork, int maxDim, int maxNCol)
(Optional) Pre-allocate the working struct used by utility_cslslv()
void utility_sseig_create(void **const phWork, int maxDim)
(Optional) Pre-allocate the working struct used by utility_sseig()
void utility_sinv(void *const hWork, float *A, float *B, const int N)
Matrix inversion: single precision, i.e.
void utility_zglslv_destroy(void **const phWork)
De-allocate the working struct used by utility_zglslv()
void utility_cpinv_destroy(void **const phWork)
De-allocate the working struct used by utility_cpinv()
void utility_sglslvt_destroy(void **const phWork)
De-allocate the working struct used by utility_sglslvt()
void utility_dpinv_destroy(void **const phWork)
De-allocate the working struct used by utility_dpinv()
void utility_cseig(void *const hWork, const float_complex *A, const int dim, int sortDecFLAG, float_complex *V, float_complex *D, float *eig)
Eigenvalue decomposition of a SYMMETRIC/HERMITION matrix: single precision complex,...
void utility_csvd(void *const hWork, const float_complex *A, const int dim1, const int dim2, float_complex *U, float_complex *S, float_complex *V, float *sing)
Singular value decomposition: single precision complex, i.e.
void utility_dpinv(void *const hWork, const double *inM, const int dim1, const int dim2, double *outM)
General matrix pseudo-inverse (the svd way): double precision, i.e.
void utility_cseig_create(void **const phWork, int maxDim)
(Optional) Pre-allocate the working struct used by utility_cseig()
void utility_diminv(const double *a, const int len, int *index)
Double-precision, index of minimum absolute value in a vector, i.e.
void utility_sslslv_destroy(void **const phWork)
De-allocate the working struct used by utility_sslslv()
void utility_schol(void *const hWork, const float *A, const int dim, float *X)
Cholesky factorisation of a symmetric matrix positive-definate matrix: single precision,...
#define SAF_MIN(a, b)
Returns the minimum of the two values.
void utility_zeig(void *const hWork, const double_complex *A, const int dim, double_complex *VL, double_complex *VR, double_complex *D, double_complex *eig)
Eigenvalue decomposition of a NON-SYMMETRIC matrix: double precision complex, i.e.
void utility_cvabs(const float_complex *a, const int len, float *c)
Single-precision, complex, absolute values of vector elements, i.e.
void utility_cvvdot(const float_complex *a, const float_complex *b, const int len, CONJ_FLAG flag, float_complex *c)
Single-precision, complex, vector-vector dot product, i.e.
void utility_sglslv_create(void **const phWork, int maxDim, int maxNCol)
(Optional) Pre-allocate the working struct used by utility_sglslv()
void utility_cinv_create(void **const phWork, int maxN)
(Optional) Pre-allocate the working struct used by utility_cinv()
void utility_cvvsub(const float_complex *a, const float_complex *b, const int len, float_complex *c)
Single-precision, complex, vector-vector subtraction, i.e.
void utility_zpinv(void *const hWork, const double_complex *inM, const int dim1, const int dim2, double_complex *outM)
General matrix pseudo-inverse (the svd way): double precision complex, i.e.
void utility_zpinv_create(void **const phWork, int maxDim1, int maxDim2)
(Optional) Pre-allocate the working struct used by utility_zpinv()
void utility_sseig(void *const hWork, const float *A, const int dim, int sortDecFLAG, float *V, float *D, float *eig)
Eigenvalue decomposition of a SYMMETRIC matrix: single precision, i.e.
void utility_dinv_destroy(void **const phWork)
De-allocate the working struct used by utility_dinv()
void utility_cchol(void *const hWork, const float_complex *A, const int dim, float_complex *X)
Cholesky factorisation of a hermitian positive-definate matrix: single precision complex,...
#define saf_print_warning(message)
Macro to print a warning message along with the filename and line number.
void utility_cchol_destroy(void **const phWork)
De-allocate the working struct used by utility_cchol()
void utility_zvvcopy(const double_complex *a, const int len, double_complex *c)
double-precision, complex, vector-vector copy, i.e.
float utility_sdet(void *const hWork, float *A, int N)
Determinant of a Matrix, single precision, i,e.
void utility_zeigmp_destroy(void **const phWork)
De-allocate the working struct used by utility_zeigmp()
void utility_schol_create(void **const phWork, int maxDim)
(Optional) Pre-allocate the working struct used by utility_schol()
void utility_sslslv_create(void **const phWork, int maxDim, int maxNCol)
(Optional) Pre-allocate the working struct used by utility_sslslv()
void utility_zvvadd(const double_complex *a, const double_complex *b, const int len, double_complex *c)
Double-precision, complex, vector-vector addition, i.e.
void utility_ceigmp_destroy(void **const phWork)
De-allocate the working struct used by utility_ceigmp()
void utility_csvd_create(void **const phWork, int maxDim1, int maxDim2)
(Optional) Pre-allocate the working struct used by utility_csvd()
void utility_cvvcopy(const float_complex *a, const int len, float_complex *c)
Single-precision, complex, vector-vector copy, i.e.
void utility_svvadd(const float *a, const float *b, const int len, float *c)
Single-precision, vector-vector addition, i.e.
void utility_svrecip(const float *a, const int len, float *c)
Single-precision, vector-reciprocal/inversion, i.e.
void utility_sglslvt_create(void **const phWork, int maxDim, int maxNCol)
(Optional) Pre-allocate the working struct used by utility_sglslvt()
void utility_spinv_destroy(void **const phWork)
De-allocate the working struct used by utility_spinv()
void utility_sslslv(void *const hWork, const float *A, const int dim, float *B, int nCol, float *X)
Linear solver for SYMMETRIC positive-definate 'A': single precision, i.e.
void utility_ddet_create(void **const phWork, int maxN)
(Optional) Pre-allocate the working struct used by utility_ddet()
void utility_sdet_create(void **const phWork, int maxN)
(Optional) Pre-allocate the working struct used by utility_sdet()
void utility_cvsmul(float_complex *a, const float_complex *s, const int len, float_complex *c)
Single-precision, complex, multiplies each element in vector 'a' with a scalar 's',...
void utility_cglslv_create(void **const phWork, int maxDim, int maxNCol)
(Optional) Pre-allocate the working struct used by utility_cglslv()
void utility_ceig_create(void **const phWork, int maxDim)
(Optional) Pre-allocate the working struct used by utility_ceig()
void utility_dglslv_create(void **const phWork, int maxDim, int maxNCol)
(Optional) Pre-allocate the working struct used by utility_dglslv()
void utility_svvdot(const float *a, const float *b, const int len, float *c)
Single-precision, vector-vector dot product, i.e.
void utility_svsdiv(const float *a, const float *s, const int len, float *c)
Single-precision, divides each element in vector 'a' with a scalar 's', i.e.
void utility_cslslv_destroy(void **const phWork)
De-allocate the working struct used by utility_cslslv()
void utility_zglslv_create(void **const phWork, int maxDim, int maxNCol)
(Optional) Pre-allocate the working struct used by utility_zglslv()
void utility_cvconj(const float_complex *a, const int len, float_complex *c)
Single-precision, complex, vector-conjugate, i.e.
void utility_dvvcopy(const double *a, const int len, double *c)
double-precision, vector-vector copy, i.e.
void utility_sseig_destroy(void **const phWork)
De-allocate the working struct used by utility_sseig()
void utility_svabs(const float *a, const int len, float *c)
Single-precision, absolute values of vector elements, i.e.
void utility_svsmul(float *a, const float *s, const int len, float *c)
Single-precision, multiplies each element in vector 'a' with a scalar 's', i.e.
void utility_zvconj(const double_complex *a, const int len, double_complex *c)
Double-precision, complex, vector-conjugate, i.e.
void utility_sinv_destroy(void **const phWork)
De-allocate the working struct used by utility_sinv()
void utility_cchol_create(void **const phWork, int maxDim)
(Optional) Pre-allocate the working struct used by utility_cchol()
void utility_sdet_destroy(void **const phWork)
De-allocate the working struct used by utility_sdet()
void utility_siminv(const float *a, const int len, int *index)
Single-precision, index of minimum absolute value in a vector, i.e.
void utility_zeigmp_create(void **const phWork, int maxDim)
(Optional) Pre-allocate the working struct used by utility_zeigmp()
void utility_ssv2cv_inds(const float *sv, const int *inds, const int len, float *cv)
Single-precision, sparse-vector to compressed vector given known indices i.e.
void utility_sglslv_destroy(void **const phWork)
De-allocate the working struct used by utility_sglslv()
void utility_simaxv(const float *a, const int len, int *index)
Single-precision, index of maximum absolute value in a vector, i.e.
void utility_ssvd_destroy(void **const phWork)
De-allocate the working struct used by utility_ssvd()
void utility_zpinv_destroy(void **const phWork)
De-allocate the working struct used by utility_zpinv()
void utility_cslslv(void *const hWork, const float_complex *A, const int dim, float_complex *B, int nCol, float_complex *X)
Linear solver for HERMITIAN positive-definate 'A': single precision complex, i.e.
@ NO_CONJ
Do not take the conjugate.
@ CONJ
Take the conjugate.
void * malloc1d(size_t dim1_data_size)
1-D malloc (same as malloc, but with error checking)
void * realloc1d(void *ptr, size_t dim1_data_size)
1-D realloc (same as realloc, but with error checking)
Include header for SAF externals.
Main header for the utilities module (SAF_UTILITIES_MODULE)
double veclib_double
real: 8-bytes
MKL_INT veclib_int
integer: 4-bytes
MKL_Complex16 veclib_double_complex
complex: 16-bytes
MKL_Complex8 veclib_float_complex
complex: 8-bytes
#define SAF_INTEL_MKL_VML_MODE
Taken from the Intel MKL VML developers reference documentation: The flag(s) that can be passed to In...
float veclib_float
real: 4-bytes
Data structure for utility_cchol()
Data structure for utility_ceig()
Data structure for utility_ceigmp()
Data structure for utility_cglslv()
Data structure for utility_cinv()
Data structure for utility_cpinv()
Data structure for utility_cseig()
Data structure for utility_cslslv()
Data structure for utility_csvd()
Data structure for utility_ddet()
Data structure for utility_dglslv()
Data structure for utility_dinv()
Data structure for utility_dpinv()
Data structure for utility_schol()
Data structure for utility_sdet()
Data structure for utility_sglslv()
Data structure for utility_sglslvt()
Data structure for utility_sinv()
Data structure for utility_spinv()
Data structure for utility_sseig()
Data structure for utility_sslslv()
Data structure for utility_ssvd()
Data structure for utility_zeig()
Data structure for utility_zeigmp()
Data structure for utility_zglslv()
Data structure for utility_zpinv()