ECCE @ EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
NewtonMinimizerGradHessian.cpp
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file NewtonMinimizerGradHessian.cpp
2 
3 #include "FunctionGradHessian.h"
4 #include "SquareGradient.h"
5 
6 #include <Eigen/LU>
7 
8 #include <algorithm> // for fill
9 #include <cmath>
10 #include <iostream>
11 #include <utility>
12 
13 using namespace std;
14 using namespace Eigen;
15 
16 namespace FitNewton
17 {
18  NewtonMinimizerGradHessian::NewtonMinimizerGradHessian()
19  {
20 
21  }
22 
23 
24  NewtonMinimizerGradHessian::~NewtonMinimizerGradHessian()
25  {
26 
27  }
28 
29 
30  void NewtonMinimizerGradHessian::setFunction(FunctionGradHessian* func)
31  {
32  function=func;
33  fixparameter.clear();
34  fixparameter.assign(func->nPars(), 0);
35  }
36 
37 
38  void NewtonMinimizerGradHessian::fixParameter(unsigned int par)
39  {
40  if(par < fixparameter.size())
41  {
42  fixparameter[par] = 1;
43  }
44  }
45 
46 
47  void NewtonMinimizerGradHessian::unfixParameter(unsigned int par)
48  {
49  if(par < fixparameter.size())
50  {
51  fixparameter[par] = 0;
52  }
53  }
54 
55 
56  bool NewtonMinimizerGradHessian::zoom(const double& wolfe1, const double& wolfe2, double& lo, double& hi, VectorXd& try_grad, VectorXd& direction, double& grad0_dir, double& val0, double& val_lo, VectorXd& init_params, VectorXd& try_params, unsigned int max_iter, double& result)
57  {
58  double tryval = val0;
59  double alpha = tryval;
60  double temp1 = tryval;
61  double temp2 = tryval;
62  double temp3 = tryval;
63  bool bounds = true;
64 
65  unsigned int counter = 1;
66  while(true)
67  {
68  alpha = lo + hi;
69  alpha*=0.5;
70  try_params = direction;
71  try_params *= alpha;
72  try_params += init_params;
73  bounds = function->calcValGrad(try_params, tryval, try_grad);
74  for(unsigned int i=0;i<fixparameter.size();++i)
75  {
76  if(fixparameter[i] != 0)
77  {
78  try_grad[i] = 0.;
79  }
80  }
81  temp1 = wolfe1*alpha;
82  temp1 *= grad0_dir;
83  temp1 += val0;
84 
85  if( ( tryval > temp1 ) || ( tryval >= val_lo ) || (bounds == false) )
86  {
87 // if( (fabs((tryval - val_lo)/(tryval)) < 1.0e-4) && (tryval < val_lo) ){result = alpha;return true;}
88  if( (fabs((tryval - val_lo)/(tryval)) < 1.0e-4) ){result = alpha;return true;}
89  hi = alpha;
90  }
91  else
92  {
93  temp1 = try_grad.dot(direction);
94  temp2 = -wolfe2*grad0_dir;
95  temp3 = fabs(temp1);
96 
97  if( temp3 <= fabs(temp2) )
98  {
99  result = alpha;
100  return bounds;
101  }
102  temp3 = hi - lo;
103  temp1 *= temp3;
104  if( temp1 >= 0.)
105  {
106  hi = lo;
107  }
108  lo = alpha;
109  val_lo = tryval;
110  }
111  counter++;
112  if(counter > max_iter){return false;}
113  }
114  }
115 
116 
117  bool NewtonMinimizerGradHessian::lineSearch(double& alpha, const double& wolfe1, const double& wolfe2, VectorXd& try_grad, VectorXd& direction, double& grad0_dir, double& val0, VectorXd& init_params, VectorXd& try_params, const double& /*precision*/, const double& /*accuracy*/, unsigned int max_iter, double& result)
118  {
119  double tryval = val0;
120  double prev_val = tryval;
121  double prev_alpha = tryval;
122  double lo = tryval;
123  double hi = tryval;
124  double temp1 = tryval;
125  double temp2 = tryval;
126  double temp3 = tryval;
127  unsigned int i = 1;
128  bool bounds = true;
129 
130  prev_alpha = 0.;
131  while(true)
132  {
133  try_params = direction;
134  try_params *= alpha;
135  try_params += init_params;
136 
137  bounds = function->calcValGrad(try_params, tryval, try_grad);
138  for(unsigned int j=0;j<fixparameter.size();++j)
139  {
140  if(fixparameter[j] != 0)
141  {
142  try_grad[j] = 0.;
143  }
144  }
145  if(bounds == false)
146  {
147  while(true)
148  {
149  alpha *= 0.5;
150  try_params = direction;
151  try_params *= alpha;
152  try_params += init_params;
153 
154  bounds = function->calcValGrad(try_params, tryval, try_grad);
155  for(unsigned int j=0;j<fixparameter.size();++j)
156  {
157  if(fixparameter[j] != 0)
158  {
159  try_grad[j] = 0.;
160  }
161  }
162  if(bounds==true)
163  {
164  if(tryval < val0)
165  {
166  result=alpha;
167  return true;
168  }
169  }
170  if(i>max_iter){return false;}
171  i++;
172  }
173 
174 
175 
176  alpha = 0.5*(prev_alpha + alpha);
177  i++;
178  if(i > max_iter){return false;}
179  continue;
180  }
181 
182  temp1 = wolfe1*alpha;
183  temp1 *= grad0_dir;
184  temp1 += val0;
185  if( ( tryval > temp1 ) || ( ( tryval > prev_val ) && (i>1) ))
186  {
187  lo = prev_alpha;
188  hi = alpha;
189  return zoom(wolfe1, wolfe2, lo, hi, try_grad, direction, grad0_dir, val0, prev_val, init_params, try_params, max_iter, result);
190  }
191  temp1 = try_grad.dot(direction);
192  temp2 = -wolfe2*grad0_dir;
193  temp3 = fabs(temp1);
194 
195  if( temp3 <= fabs(temp2) )
196  {
197  result = alpha;
198  return bounds;
199  }
200  if( temp1 >= 0. )
201  {
202  lo = alpha;
203  hi = prev_alpha;
204  return zoom(wolfe1, wolfe2, lo, hi, try_grad, direction, grad0_dir, val0, tryval, init_params, try_params, max_iter, result);
205  }
206  prev_val = tryval;
207  prev_alpha = alpha;
208  alpha *= 2.;
209  i++;
210  if(i > max_iter){return false;}
211  }
212  }
213 
214 
215  bool NewtonMinimizerGradHessian::findSaddlePoint(const VectorXd& start_point, VectorXd& min_point, double tol, unsigned int max_iter, double abs_tol)
216  {
217  try
218  {
219  if(!function){throw (char*)("minimize called but function has not been set");}
220  }
221  catch(char* str)
222  {
223  cout<<"Exception from NewtonMinimizerGradHessian: "<<str<<endl;
224  throw;
225  return false;
226  }
227 
228  unsigned int npars = function->nPars();
229 
230  try
231  {
232  if(!(npars>0)){throw (char*)("function to be minimized has zero dimensions");}
233  if(start_point.rows()!=(int)npars){throw (char*)("input to minimizer not of dimension required by function to be minimized");}
234  }
235  catch(char* str)
236  {
237  cout<<"Exception from NewtonMinimizerGradHessian: "<<str<<endl;
238  throw;
239  return false;
240  }
241 
242  //parameters used for the Wolfe conditions
243  double c1 = 1.0e-6;
244  double c2 = 1.0e-1;
245 
246  VectorXd working_points[2];
247  working_points[0] = VectorXd::Zero(npars);
248  working_points[1] = VectorXd::Zero(npars);
249 
250  VectorXd* current_point = &(working_points[0]);
251  VectorXd* try_point = &(working_points[1]);
252 
253  (*current_point) = start_point;
254 
255  double value=0.;
256  double prev_value=0.;
257 
258  double dir_der=0.;
259  double scale = 1.;
260  double scale_temp = 1.;
261  double grad0_dir = 0.;
262 
263  bool good_value = true;
264  bool bounds = true;
265 
266  unsigned int search_iter = 64;
267 
268  VectorXd grad[2];
269  grad[0] = VectorXd::Zero(npars);
270  grad[1] = VectorXd::Zero(npars);
271  VectorXd* current_grad = &(grad[0]);
272  VectorXd* try_grad = &(grad[1]);
273 
274  VectorXd newgrad = VectorXd::Zero(npars);
275 
276  MatrixXd hessian = MatrixXd::Zero(npars, npars);
277 
278  VectorXd move = VectorXd::Zero(npars);
279  VectorXd unit_move = VectorXd::Zero(npars);
280 
281  FunctionGradHessian* orig_func = function;
282  SquareGradient gradsquared(orig_func);
283  function = &gradsquared;
284 
285  //try a Newton iteration
286  orig_func->calcValGradHessian((*current_point), value, (*current_grad), hessian);
287  move = -hessian.fullPivLu().solve(*current_grad);
288  gradsquared.calcValGrad((*current_point), value, newgrad);
289  good_value=true;
290  for(unsigned int i=0;i<npars;++i){if(!(move(i) == move(i))){good_value=false;break;}}
291  if(good_value == false){move = -newgrad;}
292  dir_der = newgrad.dot(move);
293  if(dir_der>0.){move = -move;}
294  gradsquared.rescaleMove((*current_point), move);
295  grad0_dir = newgrad.dot(move);
296  scale_temp = 1.;
297  //find scale, such that move*scale satisfies the strong Wolfe conditions
298  bounds = lineSearch(scale_temp, c1, c2, (*try_grad), move, grad0_dir, value, (*current_point), (*try_point), tol, abs_tol, search_iter, scale);
299  if(bounds == false){min_point = start_point; function=orig_func;return false;}
300  move *= scale;
301  (*try_point) = ((*current_point) + move);
302  orig_func->calcValGradHessian((*try_point), prev_value, (*try_grad), hessian);
303  gradsquared.calcValGrad((*try_point), prev_value, newgrad);
304  swap(current_point, try_point);
305  swap(current_grad, try_grad);
306  swap(value, prev_value);
307 
308  unsigned long int count = 1;
309  bool converged=false;
310  while(converged==false)
311  {
312  if((fabs((prev_value - value)/prev_value)<tol || fabs(prev_value - value)<abs_tol)){converged=true;break;}
313  prev_value = value;
314  //try a Newton iteration
315  move = -hessian.fullPivLu().solve(*current_grad);
316  gradsquared.calcValGrad((*current_point), value, newgrad);
317 
318  good_value=true;
319  for(unsigned int i=0;i<npars;++i){if(!(move(i) == move(i))){good_value=false;break;}}
320  scale_temp = 1.;
321  scale_temp = fabs(value/newgrad.dot(move));
322  if(good_value == false){move = -newgrad;scale_temp = fabs(value/move.dot(move));}
323  dir_der = newgrad.dot(move);
324  if(dir_der>0.){move = -move;}
325  gradsquared.rescaleMove((*current_point), move);
326  grad0_dir = newgrad.dot(move);
327  //find scale, such that move*scale satisfies the strong Wolfe conditions
328  bounds = lineSearch(scale_temp, c1, c2, (*try_grad), move, grad0_dir, value, (*current_point), (*try_point), tol, abs_tol, search_iter, scale);
329  if(bounds == false){min_point = (*current_point); function=orig_func;return false;}
330  move *= scale;
331  (*try_point) = ((*current_point) + move);
332  orig_func->calcValGradHessian((*try_point), value, (*try_grad), hessian);
333  gradsquared.calcValGrad((*try_point), value, newgrad);
334  swap(current_point, try_point);
335  swap(current_grad, try_grad);
336  count++;
337  if(count > max_iter){break;}
338  }
339  orig_func->computeCovariance(value, hessian);
340  min_point = (*current_point);
341  function=orig_func;return converged;
342  }
343 
344 
345  bool NewtonMinimizerGradHessian::minimize(const VectorXd& start_point, VectorXd& min_point, double tol, unsigned int max_iter, double abs_tol)
346  {
347  try
348  {
349  if(!function){throw (char*)("minimize called but function has not been set");}
350  }
351  catch(char* str)
352  {
353  cout<<"Exception from NewtonMinimizerGradHessian: "<<str<<endl;
354  throw;
355  return false;
356  }
357 
358  unsigned int npars = function->nPars();
359 
360  try
361  {
362  if(!(npars>0)){throw (char*)("function to be minimized has zero dimensions");}
363  if(start_point.rows()!=(int)npars){throw (char*)("input to minimizer not of dimension required by function to be minimized");}
364  }
365  catch(char* str)
366  {
367  cout<<"Exception from NewtonMinimizerGradHessian: "<<str<<endl;
368  throw;
369  return false;
370  }
371 
372 
373  //parameters used for the Wolfe conditions
374  double c1 = 1.0e-4;
375  double c2 = 0.9;
376 
377 
378  VectorXd working_points[2];
379  working_points[0] = VectorXd::Zero(npars);
380  working_points[1] = VectorXd::Zero(npars);
381 
382  VectorXd* current_point = &(working_points[0]);
383  VectorXd* try_point = &(working_points[1]);
384 
385  (*current_point) = start_point;
386 
387  double value=0.;
388  double prev_value=0.;
389 
390  double dir_der=0.;
391  double scale = 1.;
392  double scale_temp = 1.;
393  double grad0_dir = 0.;
394 
395  bool good_value = true;
396  bool bounds = true;
397 
398  unsigned int search_iter = 32;
399 
400  VectorXd grad[2];
401  grad[0] = VectorXd::Zero(npars);
402  grad[1] = VectorXd::Zero(npars);
403  VectorXd* current_grad = &(grad[0]);
404  VectorXd* try_grad = &(grad[1]);
405 
406 
407  MatrixXd hessian = MatrixXd::Zero(npars, npars);
408 
409  VectorXd move = VectorXd::Zero(npars);
410  VectorXd unit_move = VectorXd::Zero(npars);
411 
412  //try a Newton iteration
413  function->calcValGradHessian((*current_point), value, (*current_grad), hessian);
414  for(unsigned int i=0;i<fixparameter.size();++i)
415  {
416  if(fixparameter[i] != 0)
417  {
418  (*current_grad)[i] = 0.;
419  for(unsigned int j=0;j<npars;++j)
420  {
421  hessian(j,i) = 0.;
422  hessian(i,j) = 0.;
423  }
424  hessian(i,i) = 1.;
425  }
426  }
427 
428  move = -hessian.fullPivLu().solve(*current_grad);
429  good_value=true;
430  for(unsigned int i=0;i<npars;++i){if(!(move(i) == move(i))){good_value=false;break;}}
431  if(good_value == false){move = - (*current_grad);}
432  dir_der = (*current_grad).dot(move);
433  //if the inverse hessian times the negative gradient isn't even a descent direction, negate the direction
434  if(dir_der>0.)
435  {
436  move = -move;
437  }
438  function->rescaleMove((*current_point), move);
439  grad0_dir = (*current_grad).dot(move);
440  scale_temp = 1.;
441  //find scale, such that move*scale satisfies the strong Wolfe conditions
442  bounds = lineSearch(scale_temp, c1, c2, (*try_grad), move, grad0_dir, value, (*current_point), (*try_point), tol, abs_tol, search_iter, scale);
443  if(bounds == false){min_point = start_point; return false;}
444  move *= scale;
445  (*try_point) = ((*current_point) + move);
446  function->calcValGradHessian((*try_point), prev_value, (*try_grad), hessian);
447  for(unsigned int i=0;i<fixparameter.size();++i)
448  {
449  if(fixparameter[i] != 0)
450  {
451  (*try_grad)[i] = 0.;
452  for(unsigned int j=0;j<npars;++j)
453  {
454  hessian(j,i) = 0.;
455  hessian(i,j) = 0.;
456  }
457  hessian(i,i) = 1.;
458  }
459  }
460  swap(current_point, try_point);
461  swap(current_grad, try_grad);
462  swap(value, prev_value);
463 
464  unsigned long int count = 1;
465  bool converged=false;
466  while(converged==false)
467  {
468  if((fabs((prev_value - value)/prev_value)<tol || fabs(prev_value - value)<abs_tol)){converged=true;break;}
469  prev_value = value;
470  //try a Newton iteration
471 
472  move = -hessian.fullPivLu().solve(*current_grad);
473  good_value=true;
474  for(unsigned int i=0;i<npars;++i){if(!(move(i) == move(i))){good_value=false;break;}}
475  if(good_value == false){move = - (*current_grad);}
476  dir_der = (*current_grad).dot(move);
477  //if the inverse hessian times the negative gradient isn't even a descent direction, negate the direction
478  if(dir_der>0.)
479  {
480  move = -move;
481  }
482  function->rescaleMove((*current_point), move);
483  grad0_dir = (*current_grad).dot(move);
484  scale_temp = 1.;
485  //find scale, such that move*scale satisfies the strong Wolfe conditions
486 // scale_temp = fabs(value/grad0_dir);
487  bounds = lineSearch(scale_temp, c1, c2, (*try_grad), move, grad0_dir, value, (*current_point), (*try_point), tol, abs_tol, search_iter, scale);
488  if(bounds == false){min_point = (*current_point); return false;}
489  move *= scale;
490  (*try_point) = ((*current_point) + move);
491  function->calcValGradHessian((*try_point), value, (*try_grad), hessian);
492  for(unsigned int i=0;i<fixparameter.size();++i)
493  {
494  if(fixparameter[i] != 0)
495  {
496  (*try_grad)[i] = 0.;
497  for(unsigned int j=0;j<npars;++j)
498  {
499  hessian(j,i) = 0.;
500  hessian(i,j) = 0.;
501  }
502  hessian(i,i) = 1.;
503  }
504  }
505  swap(current_point, try_point);
506  swap(current_grad, try_grad);
507 
508  count++;
509  if(count > max_iter){break;}
510  }
511  function->computeCovariance(value, hessian);
512 
513  min_point = (*current_point);
514  return converged;
515  }
516 }
517 
518