%% IntOptGlob.m
%
% This algorithm finds a local optimum given random starting points. 
%
% =========================================================
% AUTHOR AND INSTITUTION INFO:
% Written by Anthony Brassil and Gabriela Nodari
% Reserve Bank of Australia
% This version: January 2018
% =========================================================

function [CPvec,NumOpt,MinErr,SameVec,CPvecAll] = IntOptGlob(Data,Active,Vers,Iter)

% INPUTS:
% Data (adj_mtx) has dimension NxN
% Active (nodes) has dimension 1xN
% Vers is a scalar
% Iter is a scalar that determines the number of randomly drawn starting
% vectors (a local optimum is found for each starting vector).

% OUTPUT:
% CPvec produces the Nx1 core/periphery vector from optimisation with
% random starting vectors.
% MinErr is a scalar equal to the value of the error function at CPvec.
% NumOpt is a scalar equal to the number of times (out of the Iter random
% starting points) that the same minimum error function value was achieved.
% SameVec is a scalar equal to one if every time the minimum error function
% was achieved, it was achieved with the same core; zero otherwise.
% CPvecAll contains all core/periphery structures that obtain the minimum
% error score. The dimensions of this matrix are [Nx?].

%% Internal functions
    function out = myfun(x)
        % x is a column vector of dimension N
        
        % An optimum will always have have more than 1 nodes in the core, 
        % and less than sum(Active)-1 nodes in the core.
        tmp1 = find(Active==1);
        if sum(x(tmp1))<=1
            out = Inf;
        elseif sum(x(tmp1))>=(sum(Active)-1)
            out = Inf;
        else
            out = errorfn(x(tmp1),Data(tmp1,tmp1),Vers);
        end
    end

    function out = v2(x)
        out = x;
        tmp8 = find(Active==1 & x'==0);
        Nc = sum(x==1);           % # of nodes in the core
        Np = sum(Active)-Nc;      % # of nodes in the periphery
        s_cc = Nc^2 - Nc;         % # of possible core-core links
        s_pp = Np^2 - Np;         % # of possible periphery-periphery links
        c = find(x==1);           % position of core nodes in the adj matrix
        Pcc = Data(c,c);          % Core block
        Ppp = Data(tmp8,tmp8);    % Periphery block
        lambda_cc = sum(Pcc(:));  % # of existing core-core links
        lambda_pp = sum(Ppp(:));  % # of existing periphery-periphery links
        pcc=lambda_cc/s_cc;
        ppp=lambda_pp/s_pp;
        if ppp > pcc
            tmp7 = x(Active'==1);
            tmp9 = ones(size(tmp7,1),size(tmp7,2),'uint64') - tmp7;
            count3 = 0;
            for k = 1:N
                if Active(k)==1
                    count3 = count3 + 1;
                    out(k) = tmp9(count3);
                end
            end
        end
    end

%% Optimisation algorithm
N = size(Data,1);

if Vers~=5
    IterMat = zeros(Iter,N,'uint64');
    IterVec = zeros(Iter,1);
    for j = 1:Iter
        
        % Initial vector has random density and random placement of links.
        % Inactive nodes are always placed in the periphery (this speeds up the
        % algorithm and reduces the curse of dimensionality but has no impact
        % on the global optimum).
        xtmp = zeros(N,1,'uint64');
        tmp5 = mrandi(sum(Active(:)),1,rand);
        count2 = 0;
        for i = 1:N
            if Active(i)==1
                count2 = count2 + 1;
                xtmp(i,1) = tmp5(count2,1);
            end
        end
        clear tmp5
        
        tmp3 = Inf;
        tmp2 = 0;
        while tmp2<tmp3
            tmp = zeros(N,1);
            
            % Determine gain from each possible change
            for i = 1:N
                if Active(i) == 1
                    xtmp2 = xtmp;
                    if xtmp(i,1) == 1
                        xtmp2(i,1) = 0;
                        tmp(i,1) = myfun(xtmp2);
                    else
                        xtmp2(i,1) = 1;
                        tmp(i,1) = myfun(xtmp2);
                    end
                else
                    tmp(i,1) = Inf;
                end
            end
            
            % Determine biggest gain (i.e. lowest error)
            tmp4 = find(tmp==min(tmp));
            if length(tmp4) > 1
                tmp5 = randperm(length(tmp4));
                adj = tmp4(tmp5(1));
            else
                adj = tmp4;
            end
            
            % Make adjustment (only if there is any gain)
            xtmp2 = xtmp;
            if xtmp(adj,1) == 1
                xtmp2(adj,1) = 0;
                tmp2 = myfun(xtmp2);
            else
                xtmp2(adj,1) = 1;
                tmp2 = myfun(xtmp2);
            end
            if tmp2 < tmp3
                xtmp = xtmp2;
                tmp3 = tmp2;
                tmp2 = -Inf;
            else
                tmp2 = tmp3;
            end
        end
        IterVec(j,1) = tmp3;
        IterMat(j,:) = xtmp';
    end
    clear tmp2 tmp3 tmp4 tmp5 tmp
    
    MinErr = min(IterVec);
    tmp4 = find(IterVec==MinErr);
    NumOpt = length(tmp4);
    
    % If more than one vector with the same minimum error, choose
    % core/periphery split randomly.
    if length(tmp4) > 1
        tmp6 = randperm(length(tmp4));
        CPvec = IterMat(tmp4(tmp6(1)),:)';
    else
        CPvec = IterMat(tmp4,:)';
    end
    clear tmp4 tmp6
    
    % Determine whether the minimum error is always produced by the same
    % core/periphery split.
    tmp5=IterMat(IterVec==MinErr,:);
    SameVec = 1;
    for i = 1:N
        if min(tmp5(:,i)) ~= max(tmp5(:,i))
            SameVec = 0;
        end
    end
    
    % Store all error-minimising core/periphery splits
    CPvecAll = unique(tmp5,'rows')';
    
    % For the maximum likelihood estimator, the same minimum error is
    % achieved by switching the core and periphery. This part of the code
    % imposes the identification restriction pcc>ppp by switching the core
    % and periphery when the above inequality does not hold.
    if Vers == 2
        CPvec = v2(CPvec);
        
        for i = 1:size(CPvecAll,2)
            CPvecAll(:,i) = v2(CPvecAll(:,i));
        end
        CPvecAll = unique(CPvecAll','rows')';
        
        % SameVec must be adjusted to account for the identification
        % restriction (i.e. if the only reason SameVec=0 is because the
        % core and periphery were switched, then SameVec should be 1). 
        if size(tmp5,1)>1
            tmp5 = double(tmp5);
            count = 0;
            for j = 2:size(tmp5,1)
                if sum(abs(tmp5(1,:)-tmp5(j,:)),2)==sum(Active)
                    count = count + 1;
                end
            end
            if count == (size(tmp5,1)-1)
                SameVec = 1;
            end
        end
    end
else
    % Version 5 is the Cucuringu et al (2016) estimator used in Appendix C. 
    % This part of the code implements this estimator (see Appendix C
    % for further details).
    deg1 = zeros(N,2);
    deg1(:,1) = (1:N)';
    deg1(:,2) = sum(Data,1)' + sum(Data,2);
    Deg = sortrows(deg1,-2);
    Opt = zeros(N,N-3);
    IterVec = zeros(N-3,1);
    for i = 2:N-2
        Opt(Deg(1:i,1),i-1) = 1;
        IterVec(i-1,1) = errorfn(Opt(:,i-1),Data,3);
    end
    
    MinErr = min(IterVec);
    tmp4 = find(IterVec==MinErr);
    NumOpt = length(tmp4);
    if length(tmp4) > 1
        tmp6 = randperm(length(tmp4));
        CPvec = Opt(:,tmp4(tmp6(1)));
    else
        CPvec = Opt(:,tmp4);
    end
    
    tmp5=Opt(:,IterVec==MinErr)';
    SameVec = 1;
    for i = 1:N
        if min(tmp5(:,i)) ~= max(tmp5(:,i))
            SameVec = 0;
        end
    end
    
    CPvecAll = unique(tmp5,'rows')';
end
    
end