R3BROOT
R3B analysis software
Loading...
Searching...
No Matches
LSQR.cxx
Go to the documentation of this file.
1/*
2* lsqr.c
3* This is a C version of LSQR derived by James W. Howse <jhowse@lanl.gov>
4* from the Fortran 77 implementation of C. C. Paige and M. A. Saunders.
5*
6* This file defines functions for data type allocation and deallocation,
7* lsqr itself (the main algorithm),
8* and functions that scale, copy, and compute the Euclidean norm of a vector.
9*
10*
11* The following history comments are maintained by Michael Saunders.
12*
13* 08 Sep 1999: First version of lsqr.c (this file) provided by
14* James W. Howse <jhowse@lanl.gov>.
15*
16* 16 Aug 2005: Forrester H Cole <fcole@Princeton.edu> reports "too many"
17"* iterations" when a good solution x0 is supplied in
18* input->sol_vec. Note that the f77 and Matlab versions of LSQR
19* intentionally don't allow x0 to be input. They require
20* the user to define r0 = b - A*x0 and solve A dx = r0,
21* then add x = x0 + dx. Note that nothing is gained unless
22* larger stopping tolerances are set in the solve for dx
23* (because dx doesn't need to be computed accurately if
24* x0 is very good). The same is true here. BEWARE!
25*
26* 14 Feb 2006: Marc Grunberg <marc@renass.u-strasbg.fr> reports successful
27* use on a sparse system of size 1422805 x 246588 with
28* 76993294 nonzero entries. Also reports that the solution
29* vector isn't initialized(!). Added
30* output->sol_vec->elements[indx] = 0.0;
31* to the first for loop.
32*
33* Presumably you are supposed to provide x0 even if it is zero.
34* Again, BEWARE! I feel it's another reason not to allow x0.
35*
36* 18 Mar 2007: Dima Sorkin <dima.sorkin@gmail.com> reports long strings
37* split across lines (fixed later by Krishna Mohan Gundu's
38* script -- see 30 Aug 2007 below).
39* Also recommends changing %i to %li for long ints.
40*
41* 05 Jul 2007: Joel Erickson <Joel.Erickson@twosigma.com> reports bug in
42* resid_tol and resid_tol_mach used in the stopping rules:
43* output->mat_resid_norm should be
44* output->frob_mat_norm in both.
45*
46* 30 Aug 2007: Krishna Mohan Gundu <gkmohan@gmail.com> also reports
47* long strings split across lines, and provides perl script
48* fix_newlines.pl to change to the form "..." "...".
49* The script is included in the sol/software/lsqr/c/ directory.
50* It has been used to create this version of lsqr.c.
51*
52* Krishna also questioned the fact that the test program
53* seems to fail on the last few rectangular cases.
54* Don't worry! ||A'r|| is in fact quite small.
55* atol and btol have been reduced from 1e-10 to 1e-15
56* to allow more iterations and hence "success".
57*
58* 02 Sep 2007 Macro "sqr" now defined to be "lsqr_sqr".
59*/
60
61#include "LSQR.h"
62#include "stdlib.h"
63
64/*-------------------------------------------------------------------------*/
65/* */
66/* Define the data type allocation and deallocation functions. */
67/* */
68/*-------------------------------------------------------------------------*/
69
70/*---------------------------------------------------------------*/
71/* */
72/* Define the error handling function for LSQR and its */
73/* associated functions. */
74/* */
75/*---------------------------------------------------------------*/
76
77void lsqr_error(const char* msg, int code)
78{
79 fprintf(stderr, "\t%s\n", msg);
80 exit(code);
81}
82
83/*---------------------------------------------------------------*/
84/* */
85/* Define the allocation function for a long vector with */
86/* subscript range x[0..n-1]. */
87/* */
88/*---------------------------------------------------------------*/
89
90lvec* alloc_lvec(long lvec_size)
91{
92 lvec* lng_vec;
93 lng_vec = (lvec*)malloc(sizeof(lvec));
94 if (!lng_vec)
95 lsqr_error("lsqr: vector allocation failure in function "
96 "alloc_lvec()",
97 -1);
98
99 lng_vec->elements = (long*)malloc((lvec_size) * sizeof(long));
100 if (!lng_vec->elements)
101 lsqr_error("lsqr: element allocation failure in "
102 "function alloc_lvec()",
103 -1);
104
105 lng_vec->length = lvec_size;
106
107 return lng_vec;
108}
109
110/*---------------------------------------------------------------*/
111/* */
112/* Define the deallocation function for the vector */
113/* created with the function 'alloc_lvec()'. */
114/* */
115/*---------------------------------------------------------------*/
116
117void free_lvec(lvec* lng_vec)
118{
119 free((double*)(lng_vec->elements));
120 free((lvec*)(lng_vec));
121 return;
122}
123
124/*---------------------------------------------------------------*/
125/* */
126/* Define the allocation function for a double vector with */
127/* subscript range x[0..n-1]. */
128/* */
129/*---------------------------------------------------------------*/
130
131dvec* alloc_dvec(long dvec_size)
132{
133 dvec* dbl_vec;
134
135 dbl_vec = (dvec*)malloc(sizeof(dvec));
136 if (!dbl_vec)
137 lsqr_error("lsqr: vector allocation failure in function "
138 "alloc_dvec()",
139 -1);
140
141 dbl_vec->elements = (double*)malloc((dvec_size) * sizeof(double));
142 if (!dbl_vec->elements)
143 lsqr_error("lsqr: element allocation failure in "
144 "function alloc_dvec()",
145 -1);
146
147 dbl_vec->length = dvec_size;
148
149 return dbl_vec;
150}
151
152/*---------------------------------------------------------------*/
153/* */
154/* Define the deallocation function for the vector */
155/* created with the function 'alloc_dvec()'. */
156/* */
157/*---------------------------------------------------------------*/
158
159void free_dvec(dvec* dbl_vec)
160{
161 free((double*)(dbl_vec->elements));
162 free((dvec*)(dbl_vec));
163 return;
164}
165
166/*---------------------------------------------------------------*/
167/* */
168/* Define the allocation function for all of the structures */
169/* used by the function 'lsqr()'. */
170/* */
171/*---------------------------------------------------------------*/
172
173void alloc_lsqr_mem(lsqr_input** in_struct,
174 lsqr_output** out_struct,
175 lsqr_work** wrk_struct,
176 long max_num_rows,
177 long max_num_cols)
178{
179 *in_struct = (lsqr_input*)alloc_lsqr_in(max_num_rows, max_num_cols);
180 if (!in_struct)
181 lsqr_error("lsqr: input structure allocation failure in "
182 "function alloc_lsqr_in()",
183 -1);
184
185 *out_struct = (lsqr_output*)alloc_lsqr_out(max_num_rows, max_num_cols);
186 if (!out_struct)
187 lsqr_error("lsqr: output structure allocation failure in "
188 "function alloc_lsqr_out()",
189 -1);
190
191 (*out_struct)->sol_vec = (dvec*)(*in_struct)->sol_vec;
192
193 *wrk_struct = (lsqr_work*)alloc_lsqr_wrk(max_num_rows, max_num_cols);
194 if (!wrk_struct)
195 lsqr_error("lsqr: work structure allocation failure in "
196 "function alloc_lsqr_wrk()",
197 -1);
198
199 return;
200}
201
202/*---------------------------------------------------------------*/
203/* */
204/* Define the deallocation function for the structure */
205/* created with the function 'alloc_lsqr_mem()'. */
206/* */
207/*---------------------------------------------------------------*/
208#include <iostream>
209
210void free_lsqr_mem(lsqr_input* in_struct, lsqr_output* out_struct, lsqr_work* wrk_struct)
211{
212 free_lsqr_in(in_struct);
213 free_lsqr_out(out_struct);
214 free_lsqr_wrk(wrk_struct);
215 return;
216}
217
218/*---------------------------------------------------------------*/
219/* */
220/* Define the allocation function for the structure of */
221/* type 'lsqr_input'. */
222/* */
223/*---------------------------------------------------------------*/
224
225lsqr_input* alloc_lsqr_in(long max_num_rows, long max_num_cols)
226{
227 lsqr_input* in_struct;
228
229 in_struct = (lsqr_input*)malloc(sizeof(lsqr_input));
230 if (!in_struct)
231 lsqr_error("lsqr: input structure allocation failure in "
232 "function alloc_lsqr_in()",
233 -1);
234
235 in_struct->rhs_vec = (dvec*)alloc_dvec(max_num_rows);
236 if (!in_struct->rhs_vec)
237 lsqr_error("lsqr: right hand side vector \'b\' "
238 "allocation failure in function alloc_lsqr_in()",
239 -1);
240
241 in_struct->sol_vec = (dvec*)alloc_dvec(max_num_cols);
242 if (!in_struct->sol_vec)
243 lsqr_error("lsqr: solution vector \'x\' allocation "
244 "failure in function alloc_lsqr_in()",
245 -1);
246
247 return in_struct;
248}
249
250/*---------------------------------------------------------------*/
251/* */
252/* Define the deallocation function for the structure */
253/* created with the function 'alloc_lsqr_in()'. */
254/* */
255/*---------------------------------------------------------------*/
256
257void free_lsqr_in(lsqr_input* in_struct)
258{
259 free_dvec(in_struct->rhs_vec);
260 free_dvec(in_struct->sol_vec);
261 free((lsqr_input*)(in_struct));
262 return;
263}
264
265/*---------------------------------------------------------------*/
266/* */
267/* Define the allocation function for the structure of */
268/* type 'lsqr_output'. */
269/* */
270/*---------------------------------------------------------------*/
271
272lsqr_output* alloc_lsqr_out(long __attribute__((unused)) max_num_rows, long max_num_cols)
273{
274 lsqr_output* out_struct;
275
276 out_struct = (lsqr_output*)malloc(sizeof(lsqr_output));
277 if (!out_struct)
278 lsqr_error("lsqr: output structure allocation failure in "
279 "function alloc_lsqr_out()",
280 -1);
281
282 out_struct->std_err_vec = (dvec*)alloc_dvec(max_num_cols);
283 if (!out_struct->std_err_vec)
284 lsqr_error("lsqr: standard error vector \'e\' "
285 "allocation failure in function alloc_lsqr_out()",
286 -1);
287
288 return out_struct;
289}
290
291/*---------------------------------------------------------------*/
292/* */
293/* Define the deallocation function for the structure */
294/* created with the function 'alloc_lsqr_out()'. */
295/* */
296/*---------------------------------------------------------------*/
297
298void free_lsqr_out(lsqr_output* out_struct)
299{
300 free_dvec(out_struct->std_err_vec);
301 free((lsqr_output*)(out_struct));
302 return;
303}
304
305/*---------------------------------------------------------------*/
306/* */
307/* Define the allocation function for the structure of */
308/* type 'lsqr_work'. */
309/* */
310/*---------------------------------------------------------------*/
311
312lsqr_work* alloc_lsqr_wrk(long __attribute__((unused)) max_num_rows, long max_num_cols)
313{
314 lsqr_work* wrk_struct;
315
316 wrk_struct = (lsqr_work*)malloc(sizeof(lsqr_work));
317 if (!wrk_struct)
318 lsqr_error("lsqr: work structure allocation failure in "
319 "function alloc_lsqr_wrk()",
320 -1);
321
322 wrk_struct->bidiag_wrk_vec = (dvec*)alloc_dvec(max_num_cols);
323 if (!wrk_struct->bidiag_wrk_vec)
324 lsqr_error("lsqr: bidiagonalization work "
325 "vector \'v\' allocation failure in function alloc_lsqr_wrk()",
326 -1);
327
328 wrk_struct->srch_dir_vec = (dvec*)alloc_dvec(max_num_cols);
329 if (!wrk_struct->srch_dir_vec)
330 lsqr_error("lsqr: search direction vector \'w\' "
331 "allocation failure in function alloc_lsqr_wrk()",
332 -1);
333
334 return wrk_struct;
335}
336
337/*---------------------------------------------------------------*/
338/* */
339/* Define the deallocation function for the structure */
340/* created with the function 'alloc_lsqr_wrk()'. */
341/* */
342/*---------------------------------------------------------------*/
343
344void free_lsqr_wrk(lsqr_work* wrk_struct)
345{
346 free_dvec(wrk_struct->bidiag_wrk_vec);
347 free_dvec(wrk_struct->srch_dir_vec);
348 free((lsqr_work*)(wrk_struct));
349 return;
350}
351
352/*-------------------------------------------------------------------------*/
353/* */
354/* Define the LSQR function. */
355/* */
356/*-------------------------------------------------------------------------*/
357
358/*
359 *------------------------------------------------------------------------------
360 *
361 * LSQR finds a solution x to the following problems:
362 *
363 * 1. Unsymmetric equations -- solve A*x = b
364 *
365 * 2. Linear least squares -- solve A*x = b
366 * in the least-squares sense
367 *
368 * 3. Damped least squares -- solve ( A )*x = ( b )
369 * ( damp*I ) ( 0 )
370 * in the least-squares sense
371 *
372 * where 'A' is a matrix with 'm' rows and 'n' columns, 'b' is an
373 * 'm'-vector, and 'damp' is a scalar. (All quantities are real.)
374 * The matrix 'A' is intended to be large and sparse.
375 *
376 *
377 * Notation
378 * --------
379 *
380 * The following quantities are used in discussing the subroutine
381 * parameters:
382 *
383 * 'Abar' = ( A ), 'bbar' = ( b )
384 * ( damp*I ) ( 0 )
385 *
386 * 'r' = b - A*x, 'rbar' = bbar - Abar*x
387 *
388 * 'rnorm' = sqrt( norm(r)**2 + damp**2 * norm(x)**2 )
389 * = norm( rbar )
390 *
391 * 'rel_prec' = the relative precision of floating-point arithmetic
392 * on the machine being used. Typically 2.22e-16
393 * with 64-bit arithmetic.
394 *
395 * LSQR minimizes the function 'rnorm' with respect to 'x'.
396 *
397 *
398 * References
399 * ----------
400 *
401 * C.C. Paige and M.A. Saunders, LSQR: An algorithm for sparse
402 * linear equations and sparse least squares,
403 * ACM Transactions on Mathematical Software 8, 1 (March 1982),
404 * pp. 43-71.
405 *
406 * C.C. Paige and M.A. Saunders, Algorithm 583, LSQR: Sparse
407 * linear equations and least-squares problems,
408 * ACM Transactions on Mathematical Software 8, 2 (June 1982),
409 * pp. 195-209.
410 *
411 * C.L. Lawson, R.J. Hanson, D.R. Kincaid and F.T. Krogh,
412 * Basic linear algebra subprograms for Fortran usage,
413 * ACM Transactions on Mathematical Software 5, 3 (Sept 1979),
414 * pp. 308-323 and 324-325.
415 *
416 *------------------------------------------------------------------------------
417 */
418void lsqr(lsqr_input* input,
419 lsqr_output* output,
420 lsqr_work* work,
421 std::function<void(long, dvec*, dvec*, void*)> mat_vec_prod,
422 void* prod)
423{
424 double dvec_norm2(dvec*);
425
426 long indx, term_iter, term_iter_max;
427
428 double alpha, beta, rhobar, phibar, bnorm, bbnorm, cs1, sn1, psi, rho, cs, sn, theta, phi, tau, ddnorm, delta,
429 gammabar, zetabar, gamma, cs2, sn2, zeta, xxnorm, res, resid_tol, cond_tol, resid_tol_mach, temp, stop_crit_1,
430 stop_crit_2, stop_crit_3;
431
432 static char term_msg[8][80] = { "The exact solution is x = x0",
433 "The residual Ax - b is small enough, given ATOL and BTOL",
434 "The least squares error is small enough, given ATOL",
435 "The estimated condition number has exceeded CONLIM",
436 "The residual Ax - b is small enough, given machine precision",
437 "The least squares error is small enough, given machine precision",
438 "The estimated condition number has exceeded machine precision",
439 "The iteration limit has been reached" };
440
441 if (input->lsqr_fp_out != NULL)
442 fprintf(input->lsqr_fp_out,
443 " Least Squares Solution of A*x = b\n"
444 " The matrix A has %7li rows and %7li columns\n"
445 " The damping parameter is\tDAMP = %10.2e\n"
446 " ATOL = %10.2e\t\tCONDLIM = %10.2e\n"
447 " BTOL = %10.2e\t\tITERLIM = %10li\n\n",
448 input->num_rows,
449 input->num_cols,
450 input->damp_val,
451 input->rel_mat_err,
452 input->cond_lim,
453 input->rel_rhs_err,
454 input->max_iter);
455
456 output->term_flag = 0;
457 term_iter = 0;
458
459 output->num_iters = 0;
460
461 output->frob_mat_norm = 0.0;
462 output->mat_cond_num = 0.0;
463 output->sol_norm = 0.0;
464
465 for (indx = 0; indx < input->num_cols; indx++)
466 {
467 work->bidiag_wrk_vec->elements[indx] = 0.0;
468 work->srch_dir_vec->elements[indx] = 0.0;
469 output->std_err_vec->elements[indx] = 0.0;
470 output->sol_vec->elements[indx] = 0.0;
471 }
472
473 bbnorm = 0.0;
474 ddnorm = 0.0;
475 xxnorm = 0.0;
476
477 cs2 = -1.0;
478 sn2 = 0.0;
479 zeta = 0.0;
480 res = 0.0;
481
482 if (input->cond_lim > 0.0)
483 cond_tol = 1.0 / input->cond_lim;
484 else
485 cond_tol = DBL_EPSILON;
486
487 alpha = 0.0;
488 beta = 0.0;
489 /*
490 * Set up the initial vectors u and v for bidiagonalization. These satisfy
491 * the relations
492 * BETA*u = b - A*x0
493 * ALPHA*v = A^T*u
494 */
495 /* Compute b - A*x0 and store in vector u which initially held vector b */
496 dvec_scale((-1.0), input->rhs_vec);
497 mat_vec_prod(0, input->sol_vec, input->rhs_vec, prod);
498 dvec_scale((-1.0), input->rhs_vec);
499
500 /* compute Euclidean length of u and store as BETA */
501 beta = dvec_norm2(input->rhs_vec);
502
503 if (beta > 0.0)
504 {
505 /* scale vector u by the inverse of BETA */
506 dvec_scale((1.0 / beta), input->rhs_vec);
507
508 /* Compute matrix-vector product A^T*u and store it in vector v */
509 mat_vec_prod(1, work->bidiag_wrk_vec, input->rhs_vec, prod);
510
511 /* compute Euclidean length of v and store as ALPHA */
512 alpha = dvec_norm2(work->bidiag_wrk_vec);
513 }
514
515 if (alpha > 0.0)
516 {
517 /* scale vector v by the inverse of ALPHA */
518 dvec_scale((1.0 / alpha), work->bidiag_wrk_vec);
519
520 /* copy vector v to vector w */
522 }
523
524 output->mat_resid_norm = alpha * beta;
525 output->resid_norm = beta;
526 bnorm = beta;
527 /*
528 * If the norm || A^T r || is zero, then the initial guess is the exact
529 * solution. Exit and report this.
530 */
531 if ((output->mat_resid_norm == 0.0) && (input->lsqr_fp_out != NULL))
532 {
533 fprintf(input->lsqr_fp_out,
534 "\tISTOP = %3li\t\t\tITER = %9li\n"
535 " || A ||_F = %13.5e\tcond( A ) = %13.5e\n"
536 " || r ||_2 = %13.5e\t|| A^T r ||_2 = %13.5e\n"
537 " || b ||_2 = %13.5e\t|| x - x0 ||_2 = %13.5e\n\n",
538 output->term_flag,
539 output->num_iters,
540 output->frob_mat_norm,
541 output->mat_cond_num,
542 output->resid_norm,
543 output->mat_resid_norm,
544 bnorm,
545 output->sol_norm);
546
547 fprintf(input->lsqr_fp_out, " %s\n\n", term_msg[output->term_flag]);
548
549 return;
550 }
551
552 rhobar = alpha;
553 phibar = beta;
554 /*
555 * If statistics are printed at each iteration, print a header and the initial
556 * values for each quantity.
557 */
558 if (input->lsqr_fp_out != NULL)
559 {
560 fprintf(input->lsqr_fp_out,
561 " ITER || r || Compatible "
562 "||A^T r|| / ||A|| ||r|| || A || cond( A )\n\n");
563
564 stop_crit_1 = 1.0;
565 stop_crit_2 = alpha / beta;
566
567 fprintf(input->lsqr_fp_out,
568 "%6li %13.5e %10.2e \t%10.2e \t%10.2e %10.2e\n",
569 output->num_iters,
570 output->resid_norm,
571 stop_crit_1,
572 stop_crit_2,
573 output->frob_mat_norm,
574 output->mat_cond_num);
575 }
576
577 /*
578 * The main iteration loop is continued as long as no stopping criteria
579 * are satisfied and the number of total iterations is less than some upper
580 * bound.
581 */
582 while (output->term_flag == 0)
583 {
584 output->num_iters++;
585 /*
586 * Perform the next step of the bidiagonalization to obtain
587 * the next vectors u and v, and the scalars ALPHA and BETA.
588 * These satisfy the relations
589 * BETA*u = A*v - ALPHA*u,
590 * ALFA*v = A^T*u - BETA*v.
591 */
592 /* scale vector u by the negative of ALPHA */
593 dvec_scale((-alpha), input->rhs_vec);
594
595 /* compute A*v - ALPHA*u and store in vector u */
596 mat_vec_prod(0, work->bidiag_wrk_vec, input->rhs_vec, prod);
597
598 /* compute Euclidean length of u and store as BETA */
599 beta = dvec_norm2(input->rhs_vec);
600
601 /* accumulate this quantity to estimate Frobenius norm of matrix A */
602 /* bbnorm += sqr(alpha) + sqr(beta) + sqr(input->damp_val);*/
603 bbnorm += alpha * alpha + beta * beta + input->damp_val * input->damp_val;
604
605 if (beta > 0.0)
606 {
607 /* scale vector u by the inverse of BETA */
608 dvec_scale((1.0 / beta), input->rhs_vec);
609
610 /* scale vector v by the negative of BETA */
611 dvec_scale((-beta), work->bidiag_wrk_vec);
612
613 /* compute A^T*u - BETA*v and store in vector v */
614 mat_vec_prod(1, work->bidiag_wrk_vec, input->rhs_vec, prod);
615
616 /* compute Euclidean length of v and store as ALPHA */
617 alpha = dvec_norm2(work->bidiag_wrk_vec);
618
619 if (alpha > 0.0)
620 /* scale vector v by the inverse of ALPHA */
621 dvec_scale((1.0 / alpha), work->bidiag_wrk_vec);
622 }
623 /*
624 * Use a plane rotation to eliminate the damping parameter.
625 * This alters the diagonal (RHOBAR) of the lower-bidiagonal matrix.
626 */
627 cs1 = rhobar / sqrt(lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val));
628 sn1 = input->damp_val / sqrt(lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val));
629
630 psi = sn1 * phibar;
631 phibar = cs1 * phibar;
632 /*
633 * Use a plane rotation to eliminate the subdiagonal element (BETA)
634 * of the lower-bidiagonal matrix, giving an upper-bidiagonal matrix.
635 */
636 rho = sqrt(lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) + lsqr_sqr(beta));
637 cs = sqrt(lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val)) / rho;
638 sn = beta / rho;
639
640 theta = sn * alpha;
641 rhobar = -cs * alpha;
642 phi = cs * phibar;
643 phibar = sn * phibar;
644 tau = sn * phi;
645 /*
646 * Update the solution vector x, the search direction vector w, and the
647 * standard error estimates vector se.
648 */
649 for (indx = 0; indx < input->num_cols; indx++)
650 {
651 /* update the solution vector x */
652 output->sol_vec->elements[indx] += (phi / rho) * work->srch_dir_vec->elements[indx];
653
654 /* update the standard error estimates vector se */
655 output->std_err_vec->elements[indx] += lsqr_sqr((1.0 / rho) * work->srch_dir_vec->elements[indx]);
656
657 /* accumulate this quantity to estimate condition number of A
658 */
659 ddnorm += lsqr_sqr((1.0 / rho) * work->srch_dir_vec->elements[indx]);
660
661 /* update the search direction vector w */
662 work->srch_dir_vec->elements[indx] =
663 work->bidiag_wrk_vec->elements[indx] - (theta / rho) * work->srch_dir_vec->elements[indx];
664 }
665 /*
666 * Use a plane rotation on the right to eliminate the super-diagonal element
667 * (THETA) of the upper-bidiagonal matrix. Then use the result to estimate
668 * the solution norm || x ||.
669 */
670 delta = sn2 * rho;
671 gammabar = -cs2 * rho;
672 zetabar = (phi - delta * zeta) / gammabar;
673
674 /* compute an estimate of the solution norm || x || */
675 output->sol_norm = sqrt(xxnorm + lsqr_sqr(zetabar));
676
677 gamma = sqrt(lsqr_sqr(gammabar) + lsqr_sqr(theta));
678 cs2 = gammabar / gamma;
679 sn2 = theta / gamma;
680 zeta = (phi - delta * zeta) / gamma;
681
682 /* accumulate this quantity to estimate solution norm || x || */
683 xxnorm += lsqr_sqr(zeta);
684 /*
685 * Estimate the Frobenius norm and condition of the matrix A, and the
686 * Euclidean norms of the vectors r and A^T*r.
687 */
688 output->frob_mat_norm = sqrt(bbnorm);
689 output->mat_cond_num = output->frob_mat_norm * sqrt(ddnorm);
690
691 res += lsqr_sqr(psi);
692 output->resid_norm = sqrt(lsqr_sqr(phibar) + res);
693
694 output->mat_resid_norm = alpha * fabs(tau);
695 /*
696 * Use these norms to estimate the values of the three stopping criteria.
697 */
698 stop_crit_1 = output->resid_norm / bnorm;
699
700 stop_crit_2 = 0.0;
701 if (output->resid_norm > 0.0)
702 stop_crit_2 = output->mat_resid_norm / (output->frob_mat_norm * output->resid_norm);
703
704 stop_crit_3 = 1.0 / output->mat_cond_num;
705
706 /* 05 Jul 2007: Bug reported by Joel Erickson <Joel.Erickson@twosigma.com>.
707 */
708 resid_tol = input->rel_rhs_err + input->rel_mat_err *
709 output->frob_mat_norm * /* (not output->mat_resid_norm *) */
710 output->sol_norm / bnorm;
711
712 resid_tol_mach = DBL_EPSILON + DBL_EPSILON * output->frob_mat_norm * /* (not output->mat_resid_norm *) */
713 output->sol_norm / bnorm;
714 /*
715 * Check to see if any of the stopping criteria are satisfied.
716 * First compare the computed criteria to the machine precision.
717 * Second compare the computed criteria to the the user specified precision.
718 */
719 /* iteration limit reached */
720 if (output->num_iters >= input->max_iter)
721 output->term_flag = 7;
722
723 /* condition number greater than machine precision */
724 if (stop_crit_3 <= DBL_EPSILON)
725 output->term_flag = 6;
726 /* least squares error less than machine precision */
727 if (stop_crit_2 <= DBL_EPSILON)
728 output->term_flag = 5;
729 /* residual less than a function of machine precision */
730 if (stop_crit_1 <= resid_tol_mach)
731 output->term_flag = 4;
732
733 /* condition number greater than CONLIM */
734 if (stop_crit_3 <= cond_tol)
735 output->term_flag = 3;
736 /* least squares error less than ATOL */
737 if (stop_crit_2 <= input->rel_mat_err)
738 output->term_flag = 2;
739 /* residual less than a function of ATOL and BTOL */
740 if (stop_crit_1 <= resid_tol)
741 output->term_flag = 1;
742 /*
743 * If statistics are printed at each iteration, print a header and the initial
744 * values for each quantity.
745 */
746 if (input->lsqr_fp_out != NULL)
747 {
748 fprintf(input->lsqr_fp_out,
749 "%6li %13.5e %10.2e \t%10.2e \t%10.2e %10.2e\n",
750 output->num_iters,
751 output->resid_norm,
752 stop_crit_1,
753 stop_crit_2,
754 output->frob_mat_norm,
755 output->mat_cond_num);
756 }
757 /*
758 * The convergence criteria are required to be met on NCONV consecutive
759 * iterations, where NCONV is set below. Suggested values are 1, 2, or 3.
760 */
761 if (output->term_flag == 0)
762 term_iter = -1;
763
764 term_iter_max = 1;
765 term_iter++;
766
767 if ((term_iter < term_iter_max) && (output->num_iters < input->max_iter))
768 output->term_flag = 0;
769 } /* end while loop */
770 /*
771 * Finish computing the standard error estimates vector se.
772 */
773 temp = 1.0;
774
775 if (input->num_rows > input->num_cols)
776 temp = (double)(input->num_rows - input->num_cols);
777
778 if (lsqr_sqr(input->damp_val) > 0.0)
779 temp = (double)(input->num_rows);
780
781 temp = output->resid_norm / sqrt(temp);
782
783 for (indx = 0; indx < input->num_cols; indx++)
784 /* update the standard error estimates vector se */
785 output->std_err_vec->elements[indx] = temp * sqrt(output->std_err_vec->elements[indx]);
786 /*
787 * If statistics are printed at each iteration, print the statistics for the
788 * stopping condition.
789 */
790 if (input->lsqr_fp_out != NULL)
791 {
792 fprintf(input->lsqr_fp_out,
793 "\n\tISTOP = %3li\t\t\tITER = %9li\n"
794 " || A ||_F = %13.5e\tcond( A ) = %13.5e\n"
795 " || r ||_2 = %13.5e\t|| A^T r ||_2 = %13.5e\n"
796 " || b ||_2 = %13.5e\t|| x - x0 ||_2 = %13.5e\n\n",
797 output->term_flag,
798 output->num_iters,
799 output->frob_mat_norm,
800 output->mat_cond_num,
801 output->resid_norm,
802 output->mat_resid_norm,
803 bnorm,
804 output->sol_norm);
805
806 fprintf(input->lsqr_fp_out, " %s\n\n", term_msg[output->term_flag]);
807 }
808
809 return;
810}
811
812/*-------------------------------------------------------------------------*/
813/* */
814/* Define the function 'dvec_norm2()'. This function takes a vector */
815/* arguement and computes the Euclidean or L2 norm of this vector. Note */
816/* that this is a version of the BLAS function 'dnrm2()' rewritten to */
817/* use the current data structures. */
818/* */
819/*-------------------------------------------------------------------------*/
820
821double dvec_norm2(dvec* vec)
822{
823 long indx;
824 double norm;
825
826 norm = 0.0;
827
828 for (indx = 0; indx < vec->length; indx++)
829 norm += lsqr_sqr(vec->elements[indx]);
830
831 return sqrt(norm);
832}
833
834/*-------------------------------------------------------------------------*/
835/* */
836/* Define the function 'dvec_scale()'. This function takes a vector */
837/* and a scalar as arguments and multiplies each element of the vector */
838/* by the scalar. Note that this is a version of the BLAS function */
839/* 'dscal()' rewritten to use the current data structures. */
840/* */
841/*-------------------------------------------------------------------------*/
842
843void dvec_scale(double scal, dvec* vec)
844{
845 long indx;
846
847 for (indx = 0; indx < vec->length; indx++)
848 vec->elements[indx] *= scal;
849
850 return;
851}
852
853/*-------------------------------------------------------------------------*/
854/* */
855/* Define the function 'dvec_copy()'. This function takes two vectors */
856/* as arguements and copies the contents of the first into the second. */
857/* Note that this is a version of the BLAS function 'dcopy()' rewritten */
858/* to use the current data structures. */
859/* */
860/*-------------------------------------------------------------------------*/
861
862void dvec_copy(dvec* orig, dvec* copy)
863{
864 long indx;
865
866 for (indx = 0; indx < orig->length; indx++)
867 copy->elements[indx] = orig->elements[indx];
868
869 return;
870}
void free_lvec(lvec *lng_vec)
Definition LSQR.cxx:117
dvec * alloc_dvec(long dvec_size)
Definition LSQR.cxx:131
void lsqr(lsqr_input *input, lsqr_output *output, lsqr_work *work, std::function< void(long, dvec *, dvec *, void *)> mat_vec_prod, void *prod)
Definition LSQR.cxx:418
lsqr_work * alloc_lsqr_wrk(long __attribute__((unused)) max_num_rows, long max_num_cols)
Definition LSQR.cxx:312
void dvec_copy(dvec *orig, dvec *copy)
Definition LSQR.cxx:862
void alloc_lsqr_mem(lsqr_input **in_struct, lsqr_output **out_struct, lsqr_work **wrk_struct, long max_num_rows, long max_num_cols)
Definition LSQR.cxx:173
void free_lsqr_wrk(lsqr_work *wrk_struct)
Definition LSQR.cxx:344
void free_dvec(dvec *dbl_vec)
Definition LSQR.cxx:159
void free_lsqr_out(lsqr_output *out_struct)
Definition LSQR.cxx:298
double dvec_norm2(dvec *vec)
Definition LSQR.cxx:821
lsqr_output * alloc_lsqr_out(long __attribute__((unused)) max_num_rows, long max_num_cols)
Definition LSQR.cxx:272
void free_lsqr_mem(lsqr_input *in_struct, lsqr_output *out_struct, lsqr_work *wrk_struct)
Definition LSQR.cxx:210
lvec * alloc_lvec(long lvec_size)
Definition LSQR.cxx:90
void lsqr_error(const char *msg, int code)
Definition LSQR.cxx:77
void dvec_scale(double scal, dvec *vec)
Definition LSQR.cxx:843
lsqr_input * alloc_lsqr_in(long max_num_rows, long max_num_cols)
Definition LSQR.cxx:225
void free_lsqr_in(lsqr_input *in_struct)
Definition LSQR.cxx:257
struct LSQR_LONG_VECTOR lvec
struct LSQR_OUTPUTS lsqr_output
struct LSQR_WORK lsqr_work
#define lsqr_sqr(a)
Definition LSQR.h:83
struct LSQR_INPUTS lsqr_input
struct LSQR_DOUBLE_VECTOR dvec
double * elements
Definition LSQR.h:113
dvec * rhs_vec
Definition LSQR.h:203
FILE * lsqr_fp_out
Definition LSQR.h:202
long num_rows
Definition LSQR.h:195
double damp_val
Definition LSQR.h:197
long num_cols
Definition LSQR.h:196
double cond_lim
Definition LSQR.h:200
dvec * sol_vec
Definition LSQR.h:204
long max_iter
Definition LSQR.h:201
double rel_rhs_err
Definition LSQR.h:199
double rel_mat_err
Definition LSQR.h:198
long * elements
Definition LSQR.h:107
long term_flag
Definition LSQR.h:304
double sol_norm
Definition LSQR.h:310
long num_iters
Definition LSQR.h:305
double resid_norm
Definition LSQR.h:308
dvec * std_err_vec
Definition LSQR.h:312
dvec * sol_vec
Definition LSQR.h:311
double mat_cond_num
Definition LSQR.h:307
double mat_resid_norm
Definition LSQR.h:309
double frob_mat_norm
Definition LSQR.h:306
dvec * bidiag_wrk_vec
Definition LSQR.h:335
dvec * srch_dir_vec
Definition LSQR.h:336