00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00048 #ifndef SCYTHE_OPTIMIZE_H
00049 #define SCYTHE_OPTIMIZE_H
00050
00051 #ifdef SCYTHE_COMPILE_DIRECT
00052 #include "matrix.h"
00053 #include "algorithm.h"
00054 #include "error.h"
00055 #include "rng.h"
00056 #include "distributions.h"
00057 #include "la.h"
00058 #include "ide.h"
00059 #include "smath.h"
00060 #include "stat.h"
00061 #else
00062 #include "scythestat/matrix.h"
00063 #include "scythestat/algorithm.h"
00064 #include "scythestat/error.h"
00065 #include "scythestat/rng.h"
00066 #include "scythestat/distributions.h"
00067 #include "scythestat/la.h"
00068 #include "scythestat/ide.h"
00069 #include "scythestat/smath.h"
00070 #include "scythestat/stat.h"
00071 #endif
00072
00073
00074
00075
00076
00077
00078 #ifdef __MINGW32__
00079 #define SCYTHE_MINGW32_STATIC static
00080 #else
00081 #define SCYTHE_MINGW32_STATIC
00082 #endif
00083
00084 namespace scythe {
00085 #ifndef __MINGW32__
00086 namespace {
00087 #endif
00088
00089
00090 template <typename T, matrix_order O, matrix_style S>
00091 SCYTHE_MINGW32_STATIC T donothing (const Matrix<T,O,S>& x)
00092 {
00093 return (T) 0.0;
00094 }
00095
00096 template <typename T>
00097 SCYTHE_MINGW32_STATIC T donothing (T& x)
00098 {
00099 return (T) 0.0;
00100 }
00101 #ifndef __MINGW32__
00102 }
00103 #endif
00104
00105
00106
00107
00108
00109
00116 template <typename T>
00117 T
00118 epsilon()
00119 {
00120 T eps, del, neweps;
00121 del = (T) 0.5;
00122 eps = (T) 0.0;
00123 neweps = (T) 1.0;
00124
00125 while ( del > 0 ) {
00126 if ( 1 + neweps > 1 ) {
00127 eps = neweps;
00128 neweps -= del;
00129 } else {
00130 neweps += del;
00131 }
00132 del *= 0.5;
00133 }
00134
00135 return eps;
00136 }
00137
00163 template <typename T, typename FUNCTOR>
00164 T intsimp (FUNCTOR fun, T a, T b, unsigned int N)
00165 {
00166 SCYTHE_CHECK_10(a > b, scythe_invalid_arg,
00167 "Lower limit larger than upper");
00168
00169 T I = (T) 0;
00170 T w = (b - a) / N;
00171 for (unsigned int i = 1; i <= N; ++i)
00172 I += w * (fun(a +(i - 1) *w) + 4 * fun(a - w / 2 + i * w) +
00173 fun(a + i * w)) / 6;
00174
00175 return I;
00176 }
00177
00206 template <typename T, typename FUNCTOR>
00207 T adaptsimp(FUNCTOR fun, T a, T b, unsigned int N, double tol = 1e-5)
00208 {
00209 SCYTHE_CHECK_10(a > b, scythe_invalid_arg,
00210 "Lower limit larger than upper");
00211
00212 T I = intsimp(fun, a, b, N);
00213 if (std::fabs(I - intsimp(fun, a, b, N / 2)) > tol)
00214 return adaptsimp(fun, a, (a + b) / 2, N, tol)
00215 + adaptsimp(fun, (a + b) / 2, b, N, tol);
00216
00217 return I;
00218 }
00219
00247 template <matrix_order RO, matrix_style RS, typename T,
00248 matrix_order PO, matrix_style PS, typename FUNCTOR>
00249 Matrix<T, RO, RS>
00250 gradfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
00251
00252 {
00253 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00254 "Theta not column vector");
00255
00256 unsigned int k = theta.size();
00257 T h = std::sqrt(epsilon<T>());
00258 h = std::sqrt(h);
00259
00260 Matrix<T,RO,RS> grad(k, 1);
00261 Matrix<T,RO> e;
00262 Matrix<T,RO> temp;
00263 for (unsigned int i = 0; i < k; ++i) {
00264 e = Matrix<T,RO>(k, 1);
00265 e[i] = h;
00266 temp = theta + e;
00267 donothing(temp);
00268 e = temp - theta;
00269 grad[i] = (fun(theta + e) - fun(theta)) / e[i];
00270 }
00271
00272 return grad;
00273 }
00274
00275
00276 template <typename T, matrix_order O, matrix_style S,
00277 typename FUNCTOR>
00278 Matrix<T, O, Concrete>
00279 gradfdif (FUNCTOR fun, const Matrix<T,O,S>& theta)
00280 {
00281 return gradfdif<O,Concrete>(fun, theta);
00282 }
00283
00314 template <typename T, matrix_order PO1, matrix_style PS1,
00315 matrix_order PO2, matrix_style PS2, typename FUNCTOR>
00316 T
00317 gradfdifls (FUNCTOR fun, T alpha, const Matrix<T,PO1,PS1>& theta,
00318 const Matrix<T,PO2,PS2>& p)
00319
00320 {
00321 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00322 "Theta not column vector");
00323 SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
00324 "p not column vector");
00325
00326 unsigned int k = theta.size();
00327 T h = std::sqrt(epsilon<T>());
00328 h = std::sqrt(h);
00329
00330
00331 T deriv;
00332
00333 for (unsigned int i = 0; i < k; ++i) {
00334 T temp = alpha + h;
00335 donothing(temp);
00336 T e = temp - alpha;
00337 deriv = (fun(theta + (alpha + e) * p) - fun(theta + alpha * p))
00338 / e;
00339 }
00340
00341 return deriv;
00342 }
00343
00370 template <matrix_order RO, matrix_style RS, typename T,
00371 matrix_order PO, matrix_style PS, typename FUNCTOR>
00372 Matrix<T,RO,RS>
00373 jacfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
00374 {
00375 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00376 "Theta not column vector");
00377
00378 Matrix<T,RO> fval = fun(theta);
00379 unsigned int k = theta.rows();
00380 unsigned int n = fval.rows();
00381
00382 T h = std::sqrt(epsilon<T>());
00383 h = std::sqrt(h);
00384
00385 Matrix<T,RO,RS> J(n,k);
00386 Matrix<T,RO> e;
00387 Matrix<T,RO> temp;
00388 Matrix<T,RO> fthetae;
00389 Matrix<T,RO> ftheta;
00390
00391 for (int i = 0; i < k; ++i) {
00392 e = Matrix<T,RO>(k,1);
00393 e[i] = h;
00394 temp = theta + e;
00395 donothing(temp);
00396 e = temp - theta;
00397 fthetae = fun(theta + e);
00398 ftheta = fun(theta);
00399 for (unsigned int j = 0; j < n; ++j) {
00400 J(j,i) = (fthetae[j] - ftheta[j]) / e[i];
00401 }
00402 }
00403
00404 return J;
00405 }
00406
00407
00408 template <typename T, matrix_order PO, matrix_style PS,
00409 typename FUNCTOR>
00410 Matrix<T,PO,PS>
00411 jacfdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
00412 {
00413 return jacfdif<PO,Concrete>(fun, theta);
00414 }
00415
00416
00443 template <matrix_order RO, matrix_style RS, typename T,
00444 matrix_order PO, matrix_style PS, typename FUNCTOR>
00445 Matrix<T, RO, RS>
00446 hesscdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
00447 {
00448 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00449 "Theta not column vector");
00450
00451 T fval = fun(theta);
00452
00453
00454
00455
00456
00457 unsigned int k = theta.rows();
00458
00459
00460 T h2 = std::sqrt(epsilon<T>());
00461
00462 T h = std::sqrt(h2);
00463
00464 Matrix<T, RO, RS> H(k,k);
00465
00466
00467
00468 Matrix<T,RO> ei;
00469 Matrix<T,RO> ej;
00470 Matrix<T,RO> temp;
00471
00472 for (unsigned int i = 0; i < k; ++i) {
00473 ei = Matrix<T,RO>(k, 1);
00474 ei[i] = h;
00475 temp = theta + ei;
00476 donothing(temp);
00477 ei = temp - theta;
00478 for (unsigned int j = 0; j < k; ++j){
00479 ej = Matrix<T,RO>(k,1);
00480 ej[j] = h;
00481 temp = theta + ej;
00482 donothing(temp);
00483 ej = temp - theta;
00484
00485 if (i == j) {
00486 H(i,i) = ( -fun(theta + 2.0 * ei) + 16.0 *
00487 fun(theta + ei) - 30.0 * fval + 16.0 *
00488 fun(theta - ei) -
00489 fun(theta - 2.0 * ei)) / (12.0 * h2);
00490 } else {
00491 H(i,j) = ( fun(theta + ei + ej) - fun(theta + ei - ej)
00492 - fun(theta - ei + ej) + fun(theta - ei - ej))
00493 / (4.0 * h2);
00494 }
00495 }
00496 }
00497
00498
00499 return H;
00500 }
00501
00502
00503 template <typename T, matrix_order PO, matrix_style PS,
00504 typename FUNCTOR>
00505 Matrix<T,PO,PS>
00506 hesscdif (FUNCTOR fun, const Matrix<T,PO,PS>& theta)
00507 {
00508 return hesscdif<PO,Concrete>(fun, theta);
00509 }
00510
00539 template <typename T, matrix_order PO1, matrix_style PS1,
00540 matrix_order PO2, matrix_style PS2, typename FUNCTOR>
00541 T linesearch1 (FUNCTOR fun, const Matrix<T,PO1,PS1>& theta,
00542 const Matrix<T,PO2,PS2>& p)
00543 {
00544 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00545 "Theta not column vector");
00546 SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
00547 "p not column vector");
00548
00549 T alpha_bar = (T) 1.0;
00550 T rho = (T) 0.9;
00551 T c = (T) 0.5;
00552 T alpha = alpha_bar;
00553 Matrix<T,PO1> fgrad = gradfdif(fun, theta);
00554
00555 while (fun(theta + alpha * p) >
00556 (fun(theta) + c * alpha * t(fgrad) * p)[0]) {
00557 alpha = rho * alpha;
00558 }
00559
00560 return alpha;
00561 }
00562
00594 template <typename T, matrix_order PO1, matrix_style PS1,
00595 matrix_order PO2, matrix_style PS2, typename FUNCTOR,
00596 typename RNGTYPE>
00597 T linesearch2 (FUNCTOR fun, const Matrix<T,PO1,PS1>& theta,
00598 const Matrix<T,PO2,PS2>& p, rng<RNGTYPE>& runif)
00599 {
00600 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00601 "Theta not column vector");
00602 SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
00603 "p not column vector");
00604
00605 T alpha_last = (T) 0.0;
00606 T alpha_cur = (T) 1.0;
00607 T alpha_max = (T) 10.0;
00608 T c1 = (T) 1e-4;
00609 T c2 = (T) 0.5;
00610 unsigned int max_iter = 50;
00611 T fgradalpha0 = gradfdifls(fun, (T) 0, theta, p);
00612
00613 for (unsigned int i = 0; i < max_iter; ++i) {
00614 T phi_cur = fun(theta + alpha_cur * p);
00615 T phi_last = fun(theta + alpha_last * p);
00616
00617 if ((phi_cur > (fun(theta) + c1 * alpha_cur * fgradalpha0))
00618 || ((phi_cur >= phi_last) && (i > 0))) {
00619 T alphastar = zoom(fun, alpha_last, alpha_cur, theta, p);
00620 return alphastar;
00621 }
00622
00623 T fgradalpha_cur = gradfdifls(fun, alpha_cur, theta, p);
00624 if (std::fabs(fgradalpha_cur) <= -1 * c2 * fgradalpha0)
00625 return alpha_cur;
00626
00627 if ( fgradalpha_cur >= (T) 0.0) {
00628 T alphastar = zoom(fun, alpha_cur, alpha_last, theta, p);
00629 return alphastar;
00630 }
00631
00632 alpha_last = alpha_cur;
00633
00634
00635 alpha_cur = runif() * (alpha_max - alpha_cur) + alpha_cur;
00636 }
00637
00638 return 0.001;
00639 }
00640
00669 template <typename T, matrix_order PO1, matrix_style PS1,
00670 matrix_order PO2, matrix_style PS2, typename FUNCTOR>
00671 T zoom (FUNCTOR fun, T alpha_lo, T alpha_hi,
00672 const Matrix<T,PO1,PS1>& theta, const Matrix<T,PO2,PS2>& p)
00673 {
00674 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00675 "Theta not column vector");
00676 SCYTHE_CHECK_10(! p.isColVector(), scythe_dimension_error,
00677 "p not column vector");
00678
00679 T alpha_j = (alpha_lo + alpha_hi) / 2.0;
00680 T phi_0 = fun(theta);
00681 T c1 = (T) 1e-4;
00682 T c2 = (T) 0.5;
00683 T fgrad0 = gradfdifls(fun, (T) 0, theta, p);
00684
00685 unsigned int count = 0;
00686 unsigned int maxit = 20;
00687 while(count < maxit) {
00688 T phi_j = fun(theta + alpha_j * p);
00689 T phi_lo = fun(theta + alpha_lo * p);
00690
00691 if ((phi_j > (phi_0 + c1 * alpha_j * fgrad0))
00692 || (phi_j >= phi_lo)){
00693 alpha_hi = alpha_j;
00694 } else {
00695 T fgradj = gradfdifls(fun, alpha_j, theta, p);
00696 if (std::fabs(fgradj) <= -1 * c2 * fgrad0){
00697 return alpha_j;
00698 }
00699 if ( fgradj * (alpha_hi - alpha_lo) >= 0){
00700 alpha_hi = alpha_lo;
00701 }
00702 alpha_lo = alpha_j;
00703 }
00704 ++count;
00705 }
00706
00707 return alpha_j;
00708 }
00709
00710
00744
00745
00746 template <matrix_order RO, matrix_style RS, typename T,
00747 matrix_order PO, matrix_style PS,
00748 typename FUNCTOR, typename RNGTYPE>
00749 Matrix<T,RO,RS>
00750 BFGS (FUNCTOR fun, const Matrix<T,PO,PS>& theta, rng<RNGTYPE>& runif,
00751 unsigned int maxit, T tolerance, bool trace = false)
00752 {
00753 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00754 "Theta not column vector");
00755
00756 unsigned int n = theta.size();
00757
00758
00759 Matrix<T,RO> H = inv(hesscdif(fun, theta));
00760
00761
00762 Matrix<T,RO> fgrad = gradfdif(fun, theta);
00763 Matrix<T,RO> thetamin = theta;
00764 Matrix<T,RO> fgrad_new = fgrad;
00765 Matrix<T,RO> I = eye<T,RO>(n);
00766 Matrix<T,RO> s;
00767 Matrix<T,RO> y;
00768
00769 unsigned int count = 0;
00770 while( (t(fgrad_new)*fgrad_new)[0] > tolerance) {
00771 Matrix<T> p = -1.0 * H * fgrad;
00772
00773
00774
00775 T alpha = linesearch2(fun, thetamin, p, runif);
00776
00777
00778
00779
00780
00781 Matrix<T> thetamin_new = thetamin + alpha * p;
00782 fgrad_new = gradfdif(fun, thetamin_new);
00783 s = thetamin_new - thetamin;
00784 y = fgrad_new - fgrad;
00785 T rho = 1.0 / (t(y) * s)[0];
00786 H = (I - rho * s * t(y)) * H *(I - rho * y * t(s))
00787 + rho * s * t(s);
00788
00789 thetamin = thetamin_new;
00790 fgrad = fgrad_new;
00791 ++count;
00792
00793 if (trace){
00794 std::cout << "BFGS iteration = " << count << std::endl;
00795 std::cout << "thetamin = " << (t(thetamin)) ;
00796 std::cout << "gradient = " << (t(fgrad)) ;
00797 std::cout << "t(gradient) * gradient = " <<
00798 (t(fgrad) * fgrad) ;
00799 std::cout << "function value = " << fun(thetamin) <<
00800 std::endl << std::endl;
00801 }
00802
00803
00804
00805
00806
00807
00808 SCYTHE_CHECK(count > maxit, scythe_convergence_error,
00809 "Failed to converge. Try better starting values");
00810 }
00811
00812 return thetamin;
00813 }
00814
00815
00816 template <typename T, matrix_order O, matrix_style S,
00817 typename FUNCTOR, typename RNGTYPE>
00818 Matrix<T,O,Concrete>
00819 BFGS (FUNCTOR fun, const Matrix<T,O,S>& theta, rng<RNGTYPE>& runif,
00820 unsigned int maxit, T tolerance, bool trace = false)
00821 {
00822 return BFGS<O,Concrete> (fun, theta, runif, maxit, tolerance, trace);
00823 }
00824
00825
00826
00827
00828
00829
00830
00854 template <matrix_order RO, matrix_style RS, typename T,
00855 matrix_order PO, matrix_style PS, typename FUNCTOR>
00856 Matrix<T,RO,RS>
00857 nls_broyden(FUNCTOR fun, const Matrix<T,PO,PS>& theta,
00858 unsigned int maxit = 5000, T tolerance = 1e-6)
00859 {
00860 SCYTHE_CHECK_10(! theta.isColVector(), scythe_dimension_error,
00861 "Theta not column vector");
00862
00863
00864 Matrix<T,RO> thetastar = theta;
00865 Matrix<T,RO> B = jacfdif(fun, thetastar);
00866
00867 Matrix<T,RO> fthetastar;
00868 Matrix<T,RO> p;
00869 Matrix<T,RO> thetastar_new;
00870 Matrix<T,RO> fthetastar_new;
00871 Matrix<T,RO> s;
00872 Matrix<T,RO> y;
00873
00874 for (unsigned int i = 0; i < maxit; ++i) {
00875 fthetastar = fun(thetastar);
00876 p = lu_solve(B, -1 * fthetastar);
00877 T alpha = (T) 1.0;
00878 thetastar_new = thetastar + alpha*p;
00879 fthetastar_new = fun(thetastar_new);
00880 s = thetastar_new - thetastar;
00881 y = fthetastar_new - fthetastar;
00882 B = B + ((y - B * s) * t(s)) / (t(s) * s);
00883 thetastar = thetastar_new;
00884 if (max(fabs(fthetastar_new)) < tolerance)
00885 return thetastar;
00886 }
00887
00888 SCYTHE_THROW_10(scythe_convergence_error, "Failed to converge. Try better starting values or increase maxit");
00889
00890 return thetastar;
00891 }
00892
00893
00894 template <typename T, matrix_order O, matrix_style S,
00895 typename FUNCTOR>
00896 Matrix<T,O,Concrete>
00897 nls_broyden (FUNCTOR fun, const Matrix<T,O,S>& theta,
00898 unsigned int maxit = 5000, T tolerance = 1e-6)
00899 {
00900 return nls_broyden<O,Concrete>(fun, theta, maxit, tolerance);
00901 }
00902
00903 }
00904
00905 #endif