%% Load data

addpath("..\data")

% LHS variables - consumers' inflation expectations
aggregate_data = readtable("subgroups_data_new.xls", 'Sheet', 'mean'); % Weighted mean
trim_data = readtable("subgroups_data_new.xls", 'Sheet', 'trimmean'); % Trimmed mean
rounds_data = readtable("rounds_only_data.xls", 'Sheet', 'weighted_mean'); % Only including rounded responses

% By subgroup
income_data = readtable("subgroups_data_new.xls", 'Sheet', 'income'); % By income group
housing_data = readtable("subgroups_data_new.xls", 'Sheet', 'housing'); % By housing status
age_data = readtable("subgroups_data_new.xls", 'Sheet', 'age_group'); % By age group

% Wage expectations
wage_exp_data = readtable("wage_exp_data.xls", 'Sheet', 'weighted_mean');

% RHS variables - RBA inflation forecasts
rba_exp_data = readtable("rba_forecasts_cpi.xlsx", 'Sheet', 'Reshaped'); % 1 ya exp.
%rba_latest_fc = readtable("rba_forecasts_cpi.xlsx", 'Sheet', 'latest_fc'); % Used for forecasting exp. Forecasts from May 2024 SMP.
% Latest forecast data is not published externally.
early_fc_data = readtable("early_fc_data.xlsx"); % Used for RBA forecasts pre-2004Q3
rba_wexp_data = readtable("rba_forecasts_wpi.xlsx", 'Sheet', 'Reshaped'); % 1 ya exp.

% RHS variables - learned variables: observed data, perceptions, etc
infl_data = readtable("infl_data.xlsx"); % Contains many different price series taken from Gertrude
housing_lending_data = readtable("housing_lending_data.xlsx"); % Housing lending rates from RBA website
col_data = readtable("col_data.xlsx"); % Cost of living index data from ABS website
perc_data = readtable("infl_perceptions_data.xls", 'Sheet', 'weighted_mean');
wpi_data = readtable("wpi_data.xlsx");
wperc_data = readtable("wperc_data.xlsx", 'Sheet', 'data');

%% Collect all the learning variables in one table called "data"

% Start with Gertrude series
data = infl_data;

% Get dates vector
dates = data.date;

% Add some extra price series
data.trimmed_gap = data.CPI - data.Trim;
data.housing_minus_rents = data.Housing - data.Rents;

% Add quarterly housing lending rates
housing_lending = reshape(housing_lending_data.housing_lending(1:end-2), [3,(size(housing_lending_data,1)-2)/3])';
housing_lending = mean(housing_lending, 2); % Quarterly avg
housing_lending = housing_lending(5:end)-housing_lending(1:end-4); % Year-ended growth
data.housing_lending = NaN(length(dates), 1);
data.housing_lending(find(dates == "31-Mar-1960"):find(dates == "31-Mar-1960")+length(housing_lending)-1) = housing_lending;

% Add cost of living index variables
col_data.date = dates(find(dates == "30-Jun-1999"):end);
data = outerjoin(data, col_data, 'MergeKeys', true);

% Add inflation perceptions from MI survey
% Use quarterly avg
perc_raw_qavg = reshape(perc_data.infp(2:end), [3,(size(perc_data,1)-1)/3])';
perc_series = mean(perc_raw_qavg,2);
perc_nonrounded_qavg = reshape(perc_data.infp_certain(2:end), [3,(size(perc_data,1)-1)/3])';
perc_series = [perc_series, mean(perc_nonrounded_qavg,2)];

perc_names = {"perc_raw_qavg","perc_nonrounded_qavg"};
for ii = 1:length(perc_names)
    data.(perc_names{ii}) = NaN(length(dates), 1);
    data.(perc_names{ii})(dates >= "30-Sep-2000") = perc_series(:,ii);
end

% Add WPI and wage growth perceptions
data.WPI = NaN(length(dates), 1);
data.WPI(find(dates == "30-Sep-1997"):end) = wpi_data.dwpi_inb; % Include bonuses
data.wperc_nozero_qavg = NaN(length(dates), 1);
data.wperc_nozero_qavg(find(dates == "30-Jun-1998"):find(dates == "31-Dec-2006")) = wperc_data.wperc_nozero(find(wperc_data.date == "1-May-1998"):3:find(wperc_data.date == "1-Nov-2006"));
wperc_nozero_monthly = wperc_data.wperc_nozero(find(wperc_data.date == "1-Jan-2007"):find(wperc_data.date == "1-Jun-2024"));
wperc_nozero_qavg = reshape(wperc_nozero_monthly, [3, size(wperc_nozero_monthly,1)/3]);
wperc_nozero_qavg = mean(wperc_nozero_qavg, 1);
data.wperc_nozero_qavg(find(dates == "31-Mar-2007"):find(dates == "30-Jun-2024")) = wperc_nozero_qavg;

%% Create table for RBA expectations

rba_exp = table(dates, 'VariableNames', {'date'});

inflexp = [early_fc_data.rba_exp(early_fc_data.date < "1-Aug-2004"); rba_exp_data.qa4]; % Get pre-Aug 2004 from early fc spreadsheet
rba_exp.inflexp = NaN(length(dates), 1);
rba_exp.inflexp(find(dates == "31-Mar-1995"):find(dates == "31-Mar-1995")+length(inflexp)-1) = inflexp;
rba_exp.wexp = NaN(length(dates), 1);
rba_exp.wexp(find(dates == "30-Sep-2004"):end) = rba_wexp_data.qa4;

%% LHS variables - expectations data

% Create two tables, one for non-rounded and one for raw/trimmed
for ii = 1:2

    if ii == 1
        keep = "nonRounded";
        rem = "rawExp";
    else
        keep = "rawExp";
        rem = "nonRounded";
    end

    % Aggregate infl expectations
    aggregate = removevars(aggregate_data, rem);
    aggregate.all = aggregate.(keep);
    aggregate = removevars(aggregate, keep);
    
    % By subgroup
    income = removevars(income_data, rem);
    income = pivot(income, Columns = "incomeg", Rows = ["year","month"], DataVariable = keep);
    
    housing = removevars(housing_data, rem);
    housing = pivot(housing, Columns = "housing", Rows = ["year","month"], DataVariable = keep);
    
    age = removevars(age_data, rem);
    age = pivot(age, Columns = "age_group", Rows = ["year","month"], DataVariable = keep);
    
    % Combine into one table
    expect = outerjoin(income,housing,'MergeKeys',true);
    expect = outerjoin(expect,age,'MergeKeys',true);
    expect = outerjoin(expect,aggregate,'MergeKeys',true);

    if ii == 2

        % Trimmed avg measure of inflation exp
        expect.trim = [trim_data.rawExp; NaN(6,1)];

        % Rounded responses only
        expect.rounds = [rounds_data.exp_uncertain_excludes_zeros; NaN(6,1)];

        % Wage expectations
        expect.wages = [wage_exp_data.wexp; NaN(6,1)];
        expect.wages_trim = [wage_exp_data.trimwexp; NaN(6,1)];
        expect.wages_nozero = [wage_exp_data.wexp_no_zero; NaN(6,1)];

    end
    
    % Need to select month in each quarter corresponding to RBA forecasts
    i_fc_start = find(expect.year == 1995 & expect.month == 3); % RBA forecasts in 1990s are Mar, June, Sep, Dec
    i_fc_change = find(expect.year == 2000 & expect.month == 5); % RBA forecasts start being Feb, May, Aug, Nov in May 2000
    i_fc_end = find(expect.year == 2024 & expect.month == 5);
    i_fc = [i_fc_start:3:i_fc_change, i_fc_change:3:i_fc_end];
    expect = expect(i_fc,:);

    expect.date = dates(find(dates == "31-Mar-1995"):find(dates == "30-Jun-2024"));
    
    expect = outerjoin(data(:,1), expect, 'MergeKeys', true);
    expect = removevars(expect, {'year','month'});

    if ii == 1
        expect_nonrounded = expect;
    else
        expect_raw = expect;
    end
end

%%

clearvars -except dates data rba_exp expect_nonrounded expect_raw rba_latest_fc

