function [llks, AcP, BcP, AX, BX, kinfQ, K0P_cP, K1P_cP, sigma_e_n, sigma_e_r, K0Q_cP, K1Q_cP, rho0_cP, rho1_cP, cP, llkP, llkQ,  K0Q_X, K1Q_X, rho0_X_r, rho1_X_r, Sigma_cP, ... 
rho0_cP_r, rho1_cP_r] = ...
        sample_estimation_fun_r_survey_two_step_all(yields_all, W, yields_std,yields_non, mats_std,mats_nonstd, dt, VERBOSE, s_infl, s_infl_hor, s_infl_std, s_inflq, s_infl_q_hor, s_infl_q_std,s_inflLR, s_infl_LR_hor, s_infl_LR_std, s_cash, s_cash_hor,s_cash_std, s_10year, s_10year_hor, Boot, N_n, N_r)
%
% Estimates the model from Hambur and Finaly (2018) with the following setup:
%  1. For a number of candidate lamQ, calculate the optimal  (conditional on VAR estimates of P-dynamics).
%      Want to bed down best starting rho_X_r parameter magnitudes,
%      conditioanl on different lamQ
%   1a. lamQ is assumed to be real, parameterized by the difference to maintain order.
%   1b. Take randomized lamQ and rho_X_r as initial seeds.  See lines 126-127 for the
%   1c. Sigma_cP parameterized by cholesky factorization in optimization
%   1d. always use OLS estimate of Sigma_cP to start (see Joslin, Singleton, Le)
%   1e. (kinfQ, sigma_e, K0P, K1P) and real equivalent are all concentrated out of the likelihood function.  See JSZ and JLS.
%  2. For each of the set of n starting values, run fmincon and repeat 3 times.  Repeating re-sets the iteratively computed Hessian.
%  3. Store the optimisation run that gives the best likelihood.
%
% Adapated from JSZ
%
% INPUTS:
% W       : N*q,  weights for the yield portfolios measured without error
% yields  : T*q,  annualized zero coupon yields
% mats    : 1*q,  maturities, in years 
% dt      : scalar, time in years for each period
% VERBOSE : boolean, true prints more output
%
% OUTPUTS:
% K0Q_X      : N*1,      normalized latent-model matrix 
% K1Q_X      : N*N,      normalized latent-model matrix 
% Sigma_cP   : N*N,      positive definite matrix that is the covariance of innovations to cP
% K0P_cP     : N*1,      P-dynamics standard representation
% K1P_cP     : N*N,      P-dynamics standard representation
% K0Q_cP     : N*1       Q-dynamics standard representation
% K1Q_cP     : N*N       Q-dynamics standard representation
% sigma_e(_r): scalar    standard error of nominal (real) yield observation errors (errors are i.i.d)
%
% rho0_cP    : scalar    nominal short rate cofficeint
% rho1_cP    : N*1       nominal short rate cofficeint
% rho0_cP_r  : scalar    Real short rate cofficeint
% rho1_cP_r  : N*1       Real short rate cofficeint
%
% llks        : T*1       Likelihood
% llkP        : T*1       Likelihood of surveys and P-dynamics of factors
% llkQ        : T*1       Likelihood from fitted yields
% AcP        : 1*J       yt = AcP' + BcP'*Xt  (yt is J*1 vector)
% BcP        : N*J       AcP, BcP satisfy internal consistency condition that AcP*W' = 0, BcP*W' = I_N
% AX         : 1*J       yt = AX' + BX'*Xt  
% BX         : N*J       Xt is the 'jordan-normalized' latent state
% kinfQ      : scalar   Risk-neutral average rate
%


%
% The model takes the form:
%   r(t) = rho0_cP + rho1_cP'*cPt
%        = rinfQ + 1'*Xt  (Xt is the 'jordan-normalized' state)
%        = 1 period discount rate (annualized)
%
%   pi(t) = rho0_cP_r + rho1_cP_r'*cPt
%        = rho0_X_r + rho1_X_r'*Xt  (Xt is the 'jordan-normalized' state
%
%
%
% Under Q:
%   X(t+1) - X(t)   = K0Q_X  + K1Q_X*X(t)  + eps_X(t+1),   cov(eps_X(t+1)) = Sigma_X
%   cP(t+1) - cP(t) = K0Q_cP + K1Q_cP*cP(t) + eps_cP(t+1), cov(eps_cP(t+1)) = Sigma_cP
%   where Sigma_X is chosen to match Sigma_cP 
% and K0Q_X(m1) = kinfQ where m1 is the multiplicity of the highest eigenvalue (typically 1)
%
% Under P:
%   cP(t+1) - cP(t) = K0P_cP + K1P_cP*cP(t) + eps_cP(t+1),  cov(eps_cP(t+1))
%   = Sigma_cP 
%
% Model yields are given by:
%   yt^m = AcP' + BcP'*cPt  (J*1)
% And observed yields are given by:
%  yt^o = yt^m + epsilon_e(t)
% where V*epsilon_e~N(0,sigma_e^2 I_(J-N))
% and V is an (q-N)*J matrix which projects onto the span orthogonal to the
% row span of W.  This means errors are orthogonal to cPt and cPt^o = cPt^m.
%
    
%%

warning off all

if ~exist('VERBOSE','var') || isempty(VERBOSE), VERBOSE = true; end
nSeeds = 500;  % Number of random starting points.  We want to avoid really bad starting values.
mlam = .95;   % most negative eigenvalue is greater than -mlam
nRepeats = 3; % We run fmincon this many times in a row.  This is useful to reset the iterative computation of the Hessian


[N,J] = size(W);
cP = yields_all*W'; % T*N

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% SETUP INITIAL CONDITIONS:
% STARTING POINT FOR Sigma_cP:
%    Always initialize Sigma_cP at the VAR estimate.  This should be accurate, see Joslin, Le, and Singleton.
[Gamma_hat_var_plusI, alpha_hat_var, Omega_hat] = regressVAR(cP);

Gamma_hat_var=Gamma_hat_var_plusI-eye(N);
alpha_hat_var=[]; % Set to zero and use sample average to avoid potential small sample bias
%Implicitly forces the factors' steady state to be the sample average. 
% If don't do, for example level of nominal rates tends to be shifted up or
% down relative to observed (i.e. mean is biased relative to observed)

Sigma_cP0 = Omega_hat; 
L0 = chol(Sigma_cP0, 'lower');
inds = find(tril(ones(N)));
cholSigma_cP0 = L0(inds); %Cholesky factorisation of covariance matrix


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Setup the likelihood function that input a P*1 vector of parameters.
%   Parameterize eigenvalues in terms of the difference to maintain order.
%   We parametrize Sigma_cP in terms of cholesky factorization.
%   Also modify the likelihood function to return a default value for weird parameter values with numerical issues

llk_fun = @(dlamQ, cholSigma_cP, rho0_X_r, rho1_X_r, Gamma_hat) llk_fun0(yields_all, yields_std,yields_non, W, dlamQ, cholSigma_cP, rho0_X_r, rho1_X_r, Gamma_hat, alpha_hat_var, mats_std, mats_nonstd, dt, s_infl, s_infl_hor, s_infl_std, s_inflq, s_infl_q_hor, s_infl_q_std,s_inflLR, s_infl_LR_hor, s_infl_LR_std, s_cash, s_cash_hor, s_cash_std, s_10year, s_10year_hor, N_n, N_r);

options = optimset('display','off','TolX',1e-8,'TolFun',1e-8, 'MaxIter', 5000);

% dlamQ        : N*1
% cholSigma_cP : [N*(N+1)/2]*1 vector of subdiagonal element of cholesky factorization 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


% STARTING POINT FOR lamQ:
%    Generate some random seeds of lamQ and rho_X_r. Then for each lamQ get an optimal rho_X_r.
% Saves seraching over very wierd parameter sets as these two sets of
% parameters are closely related.

counter1=0 %Counter to show where up to
X0=zeros(N+N*(N+1)/2+1+N+N^2,nSeeds);
for n=1:nSeeds
    % To be sure the eigenvalues are ordered, we parameterize the difference in eigenvalues, dlamQ.
    dlamQ(1,1) = .01*randn;  % When this is positive we'll have Q-non-stationary model
    dlamQ(2:N,1) = -diff(sort([dlamQ(1); 0.1*rand(N-1,1)])); %Scaling factors based on what seemed to work best.
    R1_r=randn(N,1)*0.01; 
    R0_r=rand*0.01;
    x0=[R0_r; R1_r];
    [x, llk] = fmincon(@(Z) llk_fun(dlamQ,cholSigma_cP0, Z(1), Z(2:N+1), vec(Gamma_hat_var)),x0,[],[],[],[],[],[],[],options);
    R1_r0=x(2:N+1); %optimised parameters
    R0_r0=x(1);
    dlamQ0 = dlamQ;
    X0(:,n) = [dlamQ0; cholSigma_cP0; R0_r0; R1_r0; vec(Gamma_hat_var)]; % Stores all the starting seeds
    counter1=counter1+1
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Optimise remaining parameters

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Let's constrain so the most negative eigenvalue (=sum(dlamQ)) is greater than -mlam
A = [-ones(1,N), zeros(1,N*(N+1)/2+1+N+N^2)];
B = mlam;
Aeq = [];
Beq = [];

% Bounds for eigenvalues:
LB = [-2,-inf*ones(1,N-1)];
UB = [.5,zeros(1,N-1)];

% Bounds for cholesky factorization of Sigma_cP
A0 = ones(N);
inds_diag    = find(ismember(find(tril(A0)), find(diag(diag(A0))))) + N;
inds_offdiag = find(~ismember(find(tril(A0)), find(diag(diag(A0))))) + N;
LB(inds_diag) = 1e-7;  % Avoid getting non-singular Sigma_cP, should be positive to be identified
LB(inds_offdiag) = -inf;
LB(N+N*(N+1)/2+1:2*N+N*(N+1)/2+1+N^2)=-inf;
UB(N+1:N*(N+1)/2+2*N+1) = inf;
inds_diag_k = find(eye(N) == 1)+N*(N+1)/2+2*N+1;
inds_offdiag_k = find(eye(N) ~= 1)+N*(N+1)/2+2*N+1;
UB(inds_diag_k) = -1e-7;
UB(inds_offdiag_k) = inf;
UB(2*N+N*(N+1)/2+1+N^2+1: 2*N+N*(N+1)/2+1+N^2)=inf;


cons=@nonlconE_r; %Constraint to ensure stationarity under P - could constrain to be real eigenvalues, but didn't need

counter2=0; % Counter to test where up to
bestllk=1e20; % default likleihood have to beat
for n=1:nSeeds
    X = X0(:,n);
    for i=1:nRepeats
        [X, llkseed, exitflag] = fmincon(@(Z) llk_fun(Z(1:N),Z(N+1:N+N*(N+1)/2),Z(N+N*(N+1)/2+1),Z(N+N*(N+1)/2+2:N+N*(N+1)/2+1+N), Z(N+N*(N+1)/2+2+N:N+N*(N+1)/2+1+N+N^2)),X,A,B,Aeq,Beq,LB,UB,cons,options);
        [~, ceq]=nonlconE_r(X);
        if exitflag<0 || ceq>0 %If go to infeasible place set to bad likelihood
            llkseed=1e20;
        end
        if llkseed<bestllk
            fprintf('Improved seed llk to %5.5g\n',llkseed)
            bestllk=llkseed;
            Xbest=X;
        end
    end
    counter2=counter2+1
end

%Put the optimised parametrs in model to get back in right objects
[llk, K1Q_X, Sigma_cP,rho1_X_r,rho0_X_r, K0P_cP,K1P_cP] = llk_fun0(yields_all,yields_std, yields_non, W, Xbest(1:N),Xbest(N+1:N+N*(N+1)/2),Xbest(N+N*(N+1)/2+1),Xbest(N+N*(N+1)/2+2:N+N*(N+1)/2+1+N), Xbest(N+N*(N+1)/2+2+N:N+N*(N+1)/2+1+N+N^2), alpha_hat_var, mats_std, mats_nonstd, dt, s_infl, s_infl_hor, s_infl_std, s_inflq, s_infl_q_hor, s_infl_q_std, s_inflLR, s_infl_LR_hor, s_infl_LR_std, s_cash, s_cash_hor,s_cash_std, s_10year, s_10year_hor, N_n, N_r);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Get all parameters and likelihoods
[llks, AcP, BcP, AX, BX, kinfQ, K0P_cP, K1P_cP, sigma_e_n, sigma_e_r, K0Q_cP, K1Q_cP, rho0_cP, rho1_cP, rho0_cP_r, rho1_cP_r, cP, llkP, llkQ,  K0Q_X, K1Q_X, rho0_X, rho1_X] = ...
     jszLLK_kinf_conc_r_sur1(yields_all,yields_std,yields_non, W, K1Q_X, Sigma_cP,rho0_X_r,rho1_X_r, K1P_cP,K0P_cP, mats_std, mats_nonstd, dt, s_infl, s_infl_hor, s_infl_std, s_inflq, s_infl_q_hor, s_infl_q_std,s_inflLR, s_infl_LR_hor, s_infl_LR_std, s_cash, s_cash_hor,s_cash_std, s_10year, s_10year_hor, N_n, N_r);


%% Function to optimise
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Likelihoood function.  Very extreme parameters may have numerical
% problems since some intermediate matrices may be nearly non-singular.  In
% this case set the likelihood to a "bad" default value.
function [llk, K1Q_X, Sigma_cP,rho1_X_r,rho0_X_r, K0P_cP,K1P_cP] = llk_fun0(yields_all,yields_std,yields_non, W, dlamQ, cholSigma_cP, rho0_X_r, rho1_X_r, Gamma_hat,  alpha_hat, mats, mats_r, dt,s_infl, s_infl_hor, s_infl_std, s_inflq, s_infl_q_hor, s_infl_q_std,s_inflLR, s_infl_LR_hor, s_infl_LR_std, s_cash, s_cash_hor,s_cash_std, s_10year, s_10year_hor, N_n, N_r)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Extract the vector parameters:
N = length(dlamQ);
K1Q_X = diag(cumsum(dlamQ));
inds = find(tril(ones(N)));
L(inds) = cholSigma_cP;
L = reshape(L, [N,N]);
Sigma_cP = L*L';
K0P_cP=alpha_hat;
K1P_cP=reshape(Gamma_hat,[N,N]);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


default_llk = 1e20;
try
    llk = jszLLK_kinf_conc_r_sur1(yields_all,yields_std,yields_non, W, K1Q_X, Sigma_cP, rho0_X_r, rho1_X_r,K1P_cP,K0P_cP, mats, mats_r, dt, s_infl, s_infl_hor, s_infl_std, s_inflq, s_infl_q_hor, s_infl_q_std,s_inflLR, s_infl_LR_hor, s_infl_LR_std, s_cash, s_cash_hor, s_cash_std, s_10year, s_10year_hor, N_n, N_r);
    if isnan(llk) || ~isreal(llk) || ~isfinite(llk)
        llk = default_llk;
    end
catch
	llk = default_llk;
end

% if llk<-100
%     [llks, AcP, BcP, AX, BX, kinfQ, K0P_cP, K1P_cP, sigma_e, K0Q_cP, K1Q_cP, rho0_cP, rho1_cP, cP, llkP, llkQ,  K0Q_X, K1Q_X, rho0_X, rho1_X] = ...
%         jszLLK_kinf_conc(yields, W, K1Q_X, Sigma_cP, mats, dt);
% %     keyboard
% end
    