285 int i, j, nXcols, nYcols;
290 memset(h->lambda, 0, nYcols * nXcols *
sizeof(
float));
291 for(i = 0; i<
SAF_MIN(nXcols,nYcols); i++)
292 h->lambda[i*nXcols + i] = 1.0f;
295 utility_ssvd(h->hSVD, Cy, nYcols, nYcols, h->U_Cy, h->S_Cy, NULL, NULL);
296 for(i=0; i< nYcols; i++)
297 h->S_Cy[i*nYcols+i] = sqrtf(
SAF_MAX(h->S_Cy[i*nYcols+i], 2.23e-20f));
298 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nYcols, nYcols, 1.0f,
300 h->S_Cy, nYcols, 0.0f,
304 utility_ssvd(h->hSVD, Cx, nXcols, nXcols, h->U_Cx, h->S_Cx, NULL, h->s_Cx);
305 for(i=0; i< nXcols; i++){
306 h->S_Cx[i*nXcols+i] = sqrtf(
SAF_MAX(h->S_Cx[i*nXcols+i], 2.23e-20f));
307 h->s_Cx[i] = sqrtf(
SAF_MAX(h->s_Cx[i], 2.23e-20f));
309 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nXcols, nXcols, nXcols, 1.0f,
311 h->S_Cx, nXcols, 0.0f,
318 limit = h->s_Cx[ind] * reg + 2.23e-13f;
319 for(i=0; i < nXcols; i++)
320 h->S_Cx[i*nXcols+i] = 1.0f /
SAF_MAX(h->S_Cx[i*nXcols+i], limit);
323 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, nXcols, nXcols, nXcols, 1.0f,
325 h->U_Cx, nXcols, 0.0f,
326 h->Kx_reg_inverse, nXcols);
329 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, nXcols, nYcols, nXcols, 1.0f,
333 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nYcols, nXcols, 1.0f,
335 h->Cx_QH, nYcols, 0.0f,
338 for(i=0; i< nYcols; i++)
339 maxVal = h->G_hat[i*nYcols+i] > maxVal ? h->G_hat[i*nYcols+i] : maxVal;
340 limit = maxVal * 0.001f + 2.23e-13f;
341 for(i=0; i < nYcols; i++)
342 for(j=0; j < nYcols; j++)
343 h->G_hat[i*nYcols+j] = i==j ? sqrtf(
SAF_MAX(Cy[i*nYcols+j], 2.23e-13f) /
SAF_MAX(h->G_hat[i*nYcols+j], limit)) : 0.0f;
346 cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans, nYcols, nYcols, nYcols, 1.0f,
349 h->GhatH_Ky, nYcols);
350 cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans, nXcols, nYcols, nYcols, 1.0f,
352 h->GhatH_Ky, nYcols, 0.0f,
353 h->QH_GhatH_Ky, nYcols);
354 cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans, nXcols, nYcols, nXcols, 1.0f,
356 h->QH_GhatH_Ky, nYcols, 0.0f,
357 h->KxH_QH_GhatH_Ky, nYcols);
358 utility_ssvd(h->hSVD, h->KxH_QH_GhatH_Ky, nXcols, nYcols, h->U, NULL, h->V, NULL);
359 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, nYcols, nXcols, nXcols, 1.0f,
362 h->lambda_UH, nXcols);
363 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nXcols, nYcols, 1.0f,
365 h->lambda_UH, nXcols, 0.0f,
369 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nXcols, nXcols, 1.0f,
371 h->Kx_reg_inverse, nXcols, 0.0f,
372 h->P_Kxreginverse, nXcols);
373 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nXcols, nYcols, 1.0f,
375 h->P_Kxreginverse, nXcols, 0.0f,
379 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, nXcols, nYcols, nXcols, 1.0f,
383 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nYcols, nXcols, 1.0f,
385 h->Cx_MH, nYcols, 0.0f,
386 h->Cy_tilde, nYcols);
388 for(i=0; i < nYcols*nYcols; i++)
389 Cr[i] = Cy[i] - h->Cy_tilde[i];
393 for(i=0; i < nYcols; i++)
394 for(j=0; j < nYcols; j++)
395 h->G_hat[i*nYcols+j] = i==j ? sqrtf(
SAF_MAX(Cy[i*nYcols+j], 2.23e-20f) / (h->Cy_tilde[i*nYcols+j]+2.23e-7f)) : 0.0f;
396 cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nXcols, nYcols, 1.0f,
400 memcpy(M, h->G_M, nYcols*nXcols*
sizeof(
float));
402 memset(Cr, 0, nYcols*nYcols*
sizeof(
float));
419 int i, j, nXcols, nYcols;
420 const float_complex calpha = cmplxf(1.0f, 0.0f);
const float_complex cbeta = cmplxf(0.0f, 0.0f);
425 memset(h->lambda, 0, nYcols * nXcols *
sizeof(float_complex));
426 for(i = 0; i<
SAF_MIN(nXcols,nYcols); i++)
427 h->lambda[i*nXcols + i] = cmplxf(1.0f, 0.0f);
430 utility_csvd(h->hSVD, Cy, nYcols, nYcols, h->U_Cy, h->S_Cy, NULL, NULL);
431 for(i=0; i< nYcols; i++)
432 h->S_Cy[i*nYcols+i] = cmplxf(sqrtf(
SAF_MAX(crealf(h->S_Cy[i*nYcols+i]), 2.23e-20f)), 0.0f);
433 cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nYcols, nYcols, &calpha,
435 h->S_Cy, nYcols, &cbeta,
439 utility_csvd(h->hSVD, Cx, nXcols, nXcols, h->U_Cx, h->S_Cx, NULL, h->s_Cx);
440 for(i=0; i< nXcols; i++){
441 h->s_Cx[i] = sqrtf(
SAF_MAX(h->s_Cx[i], 2.23e-13f));
442 h->S_Cx[i*nXcols+i] = cmplxf(h->s_Cx[i], 0.0f);
444 cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nXcols, nXcols, nXcols, &calpha,
446 h->S_Cx, nXcols, &cbeta,
454 limit = h->s_Cx[ind] * reg + 2.23e-13f;
455 for(i=0; i < nXcols; i++)
456 h->S_Cx[i*nXcols+i] = cmplxf(1.0f /
SAF_MAX(h->s_Cx[i], limit), 0.0f);
459 cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans, nXcols, nXcols, nXcols, &calpha,
461 h->U_Cx, nXcols, &cbeta,
462 h->Kx_reg_inverse, nXcols);
465 cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans, nXcols, nYcols, nXcols, &calpha,
469 cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nYcols, nXcols, &calpha,
471 h->Cx_QH, nYcols, &cbeta,
476 for(i=0; i< nYcols; i++)
477 maxVal = cabsf(h->G_hat[i*nYcols+i]) > maxVal ? cabsf(h->G_hat[i*nYcols+i]) : maxVal;
478 limit = maxVal * 0.001f + 2.23e-13f;
480 cblas_scopy(nYcols, (
float*)h->G_hat, 2*(nYcols+1), h->G_hat_diag, 1);
481 memset(h->G_hat, 0, nYcols*nYcols*
sizeof(float_complex));
482 for(i=0; i<nYcols; i++)
483 h->G_hat_diag[i] = sqrtf(((
float*)Cy)[2*(i*nYcols+i)]/
SAF_MAX(h->G_hat_diag[i], limit));
484 cblas_scopy(nYcols, (
float*)h->G_hat_diag, 1, (
float*)h->G_hat, 2*(nYcols+1));
486 for(i=0; i < nYcols; i++)
487 for(j=0; j < nYcols; j++)
488 h->G_hat[i*nYcols+j] = i==j ? cmplxf(crealf(csqrtf( ccdivf(Cy[i*nYcols+j], cmplxf(
SAF_MAX(cabsf(h->G_hat[i*nYcols+j]), limit), 0.0f)))), 0.0f) : cmplxf(0.0f, 0.0f);
492 cblas_cgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans, nYcols, nYcols, nYcols, &calpha,
494 h->Ky, nYcols, &cbeta,
495 h->GhatH_Ky, nYcols);
496 cblas_cgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans, nXcols, nYcols, nYcols, &calpha,
498 h->GhatH_Ky, nYcols, &cbeta,
499 h->QH_GhatH_Ky, nYcols);
500 cblas_cgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans, nXcols, nYcols, nXcols, &calpha,
502 h->QH_GhatH_Ky, nYcols, &cbeta,
503 h->KxH_QH_GhatH_Ky, nYcols);
504 utility_csvd(h->hSVD, h->KxH_QH_GhatH_Ky, nXcols, nYcols, h->U, NULL, h->V, NULL);
505 cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans, nYcols, nXcols, nXcols, &calpha,
507 h->U, nXcols, &cbeta,
508 h->lambda_UH, nXcols);
509 cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nXcols, nYcols, &calpha,
511 h->lambda_UH, nXcols, &cbeta,
515 cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nXcols, nXcols, &calpha,
517 h->Kx_reg_inverse, nXcols, &cbeta,
518 h->P_Kxreginverse, nXcols);
519 cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nXcols, nYcols, &calpha,
521 h->P_Kxreginverse, nXcols, &cbeta,
525 cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans, nXcols, nYcols, nXcols, &calpha,
529 cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nYcols, nXcols, &calpha,
531 h->Cx_MH, nYcols, &cbeta,
532 h->Cy_tilde, nYcols);
535 cblas_sscal(nYcols*nYcols, 0.0f, ((
float*)Cr)+1, 2);
536 cblas_scopy(nYcols*nYcols, (
float*)Cy, 2, (
float*)Cr, 2);
537 cblas_saxpy(nYcols*nYcols, -1.0f, (
float*)h->Cy_tilde, 2, (
float*)Cr, 2);
539 for(i=0; i < nYcols*nYcols; i++){
540 h->Cr_cmplx[i] = ccsubf(Cy[i], h->Cy_tilde[i]);
541 Cr[i] = cmplxf(crealf(h->Cr_cmplx[i]), 0.0f);
549 for(i=0; i < nYcols; i++)
550 for(j=0; j < nYcols; j++)
551 h->G_hat[i*nYcols+j] = i==j ? csqrtf(ccdivf(Cy[i*nYcols+j], craddf(h->Cy_tilde[i*nYcols+j], 2.23e-13f))): cmplxf(0.0f, 0.0f);
557 cblas_cgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nYcols, nXcols, nYcols, &calpha,
561 memcpy(M, h->G_M, nYcols*nXcols*
sizeof(float_complex));
563 memset(Cr, 0, nYcols*nYcols*
sizeof(float_complex));