1 Introduction

In this code, we score two out-of-sample datasets using four models. The first dataset is a time-series text data will all paragraphs extracted from the RBA SMP Introduction from 1997 to 2020, and the second one is a cross-sectional text data with paragraphs collected from various sources. The output files are saved in the data_output folder, and the results are discussed in the Section 7 of the paper.

2 Preparation

2.1 Upload libraries

Upload all libraries in this step. You may need to install some packages if this is your first time using them.

load.lib<-c("caret","tidyr","dplyr","caret", "randomForest","ggplot2","PRROC", "klaR","questionr", "plyr", "stringr", "kableExtra")
lapply(load.lib,require,character.only = TRUE)
select <- dplyr::select

2.2 Import models

Upload models that were build using in the code ‘P3_Building_Models.Rmd’. So, you can run this step without running previous steps.

#content models
model_eco_content <-readRDS("./data_input/model/para_eco_content_final_model.rda")
model_noneco_content <- readRDS("./data_input/model/para_noneco_content_final_model.rda")
#clarity models
model_eco_clarity <- readRDS("./data_input/model/para_eco_clarity_final_model.rda")
model_noneco_clarity <- readRDS("./data_input/model/para_noneco_clarity_final_model.rda")

3 Score SMP Introduction

In this section, we apply our models to score paragraphs extracted from SMP introduction sections from 1997 to 2020. The output spreadsheet titled ‘smp_prediction_results.csv’ is saved in the r_output folder, and results are reported and discussed in the Section 7.1 of the paper.

3.1 Prepare SMP data

Import sample paragraphs with text features. A snapshot of the data is shown as:

## Import data from 1997 to 2019 Feb
smp_text_feature_part1 <- readRDS("./data_input/smp_text_feature_final0717.rds")
## Import data 2019 Feb to 2019 Nov
smp_text_feature_2019 <- readRDS("./data_input/smp_intro2019_text_feature.rds")  
## Import data 2020 to 2021 Feb
smp_text_feature_2020 <- readRDS("./data_input/smp_2021_text_feature.rds")

#update names to make column names consistent across three datasets
smp_text_feature_part1 <- plyr::rename(smp_text_feature_part1, c("start_word_pos" ="word_pos.word1",
                                 "second_word_pos" = "word_pos.word2",
                                 "third_word_pos" = "word_pos.word3"))

##combine the three datasets together
smp_text_feature_data <- plyr::rbind.fill(smp_text_feature_part1, smp_text_feature_2019, smp_text_feature_2020)


### transpose the following 3 variables:word_pos.word1, word_pos.word2, word_pos.word3
mydata <- smp_text_feature_data
start_word_pos <- mydata %>% ungroup() %>% select(question_index, word_pos.word1) %>% 
  group_by(question_index, word_pos.word1) %>%
  dplyr::count() %>% tidyr::spread(word_pos.word1, n)
###update the column name
colnames(start_word_pos)[2:ncol(start_word_pos)] <- paste("word_1st",colnames(start_word_pos)[2:ncol(start_word_pos)], sep = "_")

## for the 2nd word pos
second_word_pos <- mydata %>% ungroup() %>% select(question_index, word_pos.word2) %>% 
  group_by(question_index, word_pos.word2) %>% 
  dplyr::count() %>% tidyr::spread(word_pos.word2, n)
###update the column name
colnames(second_word_pos)[2:ncol(second_word_pos)] <- paste("word_2nd",colnames(second_word_pos)[2:ncol(second_word_pos)], sep = "_")

## for the 3rd word pos
third_word_pos <- mydata %>% ungroup() %>% select(question_index, word_pos.word3) %>% 
  group_by(question_index, word_pos.word3) %>% 
  dplyr::count() %>% spread(word_pos.word3, n)

colnames(third_word_pos)[2:ncol(third_word_pos)] <- paste("word_3rd",colnames(third_word_pos)[2:ncol(third_word_pos)], sep = "_")


##join the transposed data back to the main table and remove the original columns before transposing
mydata2 <- mydata %>% select(-word_pos.word1, -word_pos.word2, -word_pos.word3, -word_choose)

smp_text_feature_final <- left_join(mydata ,start_word_pos,by="question_index") %>%
  left_join(second_word_pos,by="question_index") %>%
  left_join(third_word_pos,by="question_index") %>% as.data.frame()

#further data cleaning: replace NAs with 0, remove non-English symbols from the name
names(smp_text_feature_final) <- gsub(x = names(smp_text_feature_final), 
                               pattern ='[$]', replacement = 'ds')
smp_text_feature_final$word_per_sentence <- smp_text_feature_final$word_count_stats/smp_text_feature_final$sentence_count
smp_text_feature_final$sylls_per_word <- smp_text_feature_final$readability_stats.sylls/smp_text_feature_final$word_count_stats

smp_text_feature_final[is.na(smp_text_feature_final)] <-0 # replace NA with 0s

## change some column names to make the datasets names the same
names(smp_text_feature_final) <- gsub(x = names(smp_text_feature_final), pattern = "word_1st_", replacement = "pos_word1_") 
names(smp_text_feature_final) <- gsub(x = names(smp_text_feature_final), pattern = "word_2nd_", replacement = "pos_word2_") 
names(smp_text_feature_final) <- gsub(x = names(smp_text_feature_final), pattern = "word_3rd_", replacement = "pos_word3_") 

#take a look of the data 
smp_text_feature_final %>% head() %>% kbl() %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "200px")
index.x para_index year month month_no section paragraph paragraph_clean word_count_stats sentence_count readability_stats.sylls readability_stats.polys fk_grade_level FRES_score comma_count punc_count digit_count question_index index.y CC CD DT EX IN JJ JJR NN NNS RB RBR TO VB VBD VBG VBN VBP VBZ WDT MD POS PRP RP PRPds -LRB- -RRB- NNP RBS WRB JJS WPds WP PDT UH NNPS pos_prop_CC pos_prop_CD pos_prop_DT pos_prop_EX pos_prop_IN pos_prop_JJ pos_prop_JJR pos_prop_NN pos_prop_NNS pos_prop_RB pos_prop_RBR pos_prop_TO pos_prop_VB pos_prop_VBD pos_prop_VBG pos_prop_VBN pos_prop_VBP pos_prop_VBZ pos_prop_WDT pos_prop_MD pos_prop_POS pos_prop_PRP pos_prop_RP pos_prop_PRPds pos_prop_NNP pos_prop_RBS pos_prop_WRB pos_prop_JJS pos_prop_WPds pos_prop_WP pos_prop_PDT pos_prop_UH pos_prop_NNPS para_rank index sent_1st_DT sent_1st_IN sent_1st_JJ sent_1st_NN sent_1st_NNS sent_1st_TO sent_1st_VB sent_1st_VBG sent_1st_VBP sent_1st_PRP sent_1st_RB sent_1st_VBD sent_1st_VBN sent_1st_JJS sent_1st_VBZ sent_1st_CC sent_1st_RBR sent_1st_CD sent_1st_PRPds sent_1st_POS sent_1st_WDT sent_1st_WRB sent_1st_MD sent_1st_RBS sent_1st_JJR sent_1st_EX sent_1st_RP sent_1st_WP sent_1st_NNP sent_1st_-LRB- sent_1st_-RRB- sent_1st_PDT sent_1st_prop_DT sent_1st_prop_IN sent_1st_prop_JJ sent_1st_prop_NN sent_1st_prop_NNS sent_1st_prop_TO sent_1st_prop_VB sent_1st_prop_VBG sent_1st_prop_VBP sent_1st_prop_PRP sent_1st_prop_RB sent_1st_prop_VBD sent_1st_prop_VBN sent_1st_prop_JJS sent_1st_prop_VBZ sent_1st_prop_CC sent_1st_prop_RBR sent_1st_prop_CD sent_1st_prop_PRPds sent_1st_prop_POS sent_1st_prop_WDT sent_1st_prop_WRB sent_1st_prop_MD sent_1st_prop_RBS sent_1st_prop_JJR sent_1st_prop_EX sent_1st_prop_RP sent_1st_prop_WP sent_1st_prop_NNP sent_1st_prop_-LRB- sent_1st_prop_-RRB- sent_1st_prop_PDT word_pos.word1 word_pos.word2 word_pos.word3 sent1st_clue_Attitudinal sent1st_clue_connective sent1st_clue_Contrast sent1st_clue_detail sent1st_clue_emphasis sent1st_clue_inference sent1st_clue_reformulation sent1st_clue_summary sentlast_clue_Attitudinal sentlast_clue_connective sentlast_clue_Contrast sentlast_clue_detail sentlast_clue_emphasis sentlast_clue_inference sentlast_clue_reformulation sentlast_clue_summary sentmiddle_clue_Attitudinal sentmiddle_clue_connective sentmiddle_clue_Contrast sentmiddle_clue_detail sentmiddle_clue_emphasis sentmiddle_clue_inference sentmiddle_clue_reformulation sentmiddle_clue_summary sent_1st_word_, sent_1st_word_CC sent_1st_word_CD sent_1st_word_DT sent_1st_word_EX sent_1st_word_FW sent_1st_word_IN sent_1st_word_JJ sent_1st_word_JJR sent_1st_word_JJS sent_1st_word_MD sent_1st_word_NN sent_1st_word_NNS sent_1st_word_PDT sent_1st_word_PRP sent_1st_word_PRPds sent_1st_word_RB sent_1st_word_RBR sent_1st_word_RBS sent_1st_word_RP sent_1st_word_TO sent_1st_word_VB sent_1st_word_VBD sent_1st_word_VBG sent_1st_word_VBN sent_1st_word_VBP sent_1st_word_VBZ sent_1st_word_WDT sent_1st_word_WP sent_1st_word_WRB sent_1st_parse_ADJP sent_1st_parse_NP sent_1st_parse_PP sent_1st_parse_S sent_1st_parse_SBAR sent_1st_parse_VP sent_1st_parse_WHNP sent_1st_parse_ADVP sent_1st_parse_WHADVP sent_1st_parse_WHPP sent_1st_parse_SINV sent_1st_parse_SQ sent_1st_parse_SBARQ sent_last_parse_ADJP sent_last_parse_NP sent_last_parse_PP sent_last_parse_S sent_last_parse_SBAR sent_last_parse_VP sent_last_parse_WHNP sent_last_parse_ADVP sent_last_parse_WHADVP sent_last_parse_WHPP sent_last_parse_SINV sent_last_parse_SQ sent_last_parse_SBARQ ADJP NP PP S SBAR VP WHNP ADVP WHADVP WHPP SINV SQ SBARQ word_choose word_pos X rank sent_1st_word_NNS_POS pos_word1_CC pos_word1_CD pos_word1_DT pos_word1_EX pos_word1_IN pos_word1_JJ pos_word1_JJR pos_word1_JJS pos_word1_NN pos_word1_NNS pos_word1_PRP pos_word1_RB pos_word1_RBR pos_word1_RBS pos_word1_RP pos_word1_VB pos_word1_VBD pos_word1_VBG pos_word1_VBN pos_word1_VBP pos_word1_VBZ pos_word2_CC pos_word2_CD pos_word2_DT pos_word2_IN pos_word2_JJ pos_word2_JJR pos_word2_JJS pos_word2_MD pos_word2_NN pos_word2_NN_POS pos_word2_NNS pos_word2_PDT pos_word2_PRP pos_word2_PRPds pos_word2_RB pos_word2_RBR pos_word2_RBS pos_word2_RP pos_word2_SYM pos_word2_TO pos_word2_VB pos_word2_VBD pos_word2_VBG pos_word2_VBN pos_word2_VBP pos_word2_VBZ pos_word3_CC pos_word3_CD pos_word3_DT pos_word3_EX pos_word3_IN pos_word3_JJ pos_word3_JJR pos_word3_JJS pos_word3_MD pos_word3_NN pos_word3_NNP pos_word3_NNS pos_word3_PRP pos_word3_RB pos_word3_RBS pos_word3_RP pos_word3_TO pos_word3_VB pos_word3_VBD pos_word3_VBG pos_word3_VBN pos_word3_VBP pos_word3_VBZ pos_word3_WDT pos_word3_WRB word_per_sentence sylls_per_word
1 1997_5 1997 may 5 Introduction The economy moved through a period of slower growth in 1996 during which inflationary pressures eased significantly. The March quarter CPI result confirms that underlying inflation has returned to an annual rate of close to 2 per cent, and the prospects of inflation remaining low in the near future appear to be good. With some surplus capacity existing there is scope for the economy to grow more quickly in 1997 without generating significant inflationary pressures, provided growth in labour costs is not excessive. The economy moved through a period of slower growth in 1996 during which inflationary pressures eased significantly. The March quarter CPI result confirms that underlying inflation has returned to an annual rate of close to 2 per cent, and the prospects of inflation remaining low in the near future appear to be good. With some surplus capacity existing there is scope for the economy to grow more quickly in 1997 without generating significant inflationary pressures, provided growth in labour costs is not excessive. 80 3 147 19 16.49250 24.31583 2 3 9 1997_5_1 1 1 3 8 1 14 9 1 18 4 3 1 4 2 2 4 2 1 4 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1.20 3.61 9.64 1.20 16.87 10.84 1.20 21.69 4.82 3.61 1.20 4.82 2.41 2.41 4.82 2.41 1.20 4.82 1.20 0.00 0.00 0.00 0.00 0 0 0 0 0 0 0 0 0 0 1 1 2 4 1 3 1 0 0 0 0 0 1 2 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 11.76 23.53 5.88 17.65 5.88 0.00 0.00 0.00 0.00 0 5.88 11.76 0.00 0 0.00 0.00 0.00 5.88 0 0 5.88 0 0.00 0 5.88 0.00 0.00 0 0 0 0 0 DT NN VBD 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 3 2 1 2 1 1 0 1 0 0 0 1 12 6 4 3 6 0 1 0 0 0 0 0 3 30 14 11 5 15 1 2 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 26.66667 1.837500
2 1997_5 1997 may 5 Introduction So far in 1997 there have already been some indications that the pace of growth is picking up, particularly in areas of construction investment, housing and consumer spending. Growth is being supported by several factors including the effect of the three policy easings in the second half of 1996, which brought cash rates down by 1½ percentage points. The impact of those cash rate reductions on home mortgage borrowers has been reinforced by a significant compression of intermediaries’ interest margins in that area. The lower interest rates now in place should be supportive of interest-sensitive areas of activity, particularly housing and non-residential construction, as well as helping household and business cash flows. Other factors favourable to growth at present include the strong US economy, moderately rising commodity prices, and an historically good level of business profitability in many industries. So far in 1997 there have already been some indications that the pace of growth is picking up, particularly in areas of construction investment, housing and consumer spending. Growth is being supported by several factors including the effect of the three policy easings in the second half of 1996, which brought cash rates down by 1½ percentage points. The impact of those cash rate reductions on home mortgage borrowers has been reinforced by a significant compression of intermediaries’ interest margins in that area. The lower interest rates now in place should be supportive of interest-sensitive areas of activity, particularly housing and non-residential construction, as well as helping household and business cash flows. Other factors favourable to growth at present include the strong US economy, moderately rising commodity prices, and an historically good level of business profitability in many industries. 137 5 257 34 17.23177 20.32181 7 8 9 1997_5_2 2 4 4 12 1 23 12 1 35 16 10 0 1 1 1 5 4 2 3 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 2.86 2.86 8.57 0.71 16.43 8.57 0.71 25.00 11.43 7.14 0.00 0.71 0.71 0.71 3.57 2.86 1.43 2.14 0.71 0.71 0.71 0.71 0.71 0 0 0 0 0 0 0 0 0 0 2 1 2 5 0 7 2 0 0 1 1 0 4 0 1 0 1 1 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 7.14 17.86 0.00 25.00 7.14 0.00 0.00 3.57 3.57 0 14.29 0.00 3.57 0 3.57 3.57 0.00 3.57 0 0 0.00 0 0.00 0 0.00 3.57 3.57 0 0 0 0 0 RB RB IN 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 13 4 2 1 4 0 3 0 0 0 0 0 2 13 4 1 0 1 0 0 0 0 0 0 0 3 61 23 8 2 15 1 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 27.40000 1.875912
3 1997_5 1997 may 5 Introduction Conditions in the manufacturing sector have been an exception to this generally firmer picture, with profitability under pressure and investment intentions declining. The pressure on profitability reflects a combination of rising wage costs and flat selling prices, which continue to be constrained in many parts of the manufacturing sector by strong international competition. With the exchange rate having risen in trade-weighted terms, pressures on competitiveness have intensified during the past year. Looking ahead, however, a number of areas of domestic manufacturing are likely to benefit from the expansion in housing and non-residential construction now under way. Conditions in the manufacturing sector have been an exception to this generally firmer picture, with profitability under pressure and investment intentions declining. The pressure on profitability reflects a combination of rising wage costs and flat selling prices, which continue to be constrained in many parts of the manufacturing sector by strong international competition. With the exchange rate having risen in trade-weighted terms, pressures on competitiveness have intensified during the past year. Looking ahead, however, a number of areas of domestic manufacturing are likely to benefit from the expansion in housing and non-residential construction now under way. 96 4 194 24 17.61583 11.51250 5 6 0 1997_5_3 3 3 0 10 0 17 9 1 25 8 4 0 3 2 0 4 4 4 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3.12 0.00 10.42 0.00 17.71 9.38 1.04 26.04 8.33 4.17 0.00 3.12 2.08 0.00 4.17 4.17 4.17 1.04 1.04 0.00 0.00 0.00 0.00 0 0 0 0 0 0 0 0 0 0 3 1 3 3 0 7 2 1 0 1 1 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 13.64 13.64 0.00 31.82 9.09 4.55 0.00 4.55 4.55 0 4.55 0.00 4.55 0 0.00 4.55 0.00 0.00 0 0 0.00 0 0.00 0 4.55 0.00 0.00 0 0 0 0 0 NNS IN DT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 9 4 1 0 3 0 0 0 0 0 0 0 1 12 5 3 0 4 0 3 0 0 0 0 0 1 40 18 9 1 16 1 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 24.00000 2.020833
4 1997_5 1997 may 5 Introduction Employment growth has lagged behind the overall pace of economic activity. Total employment has been growing at a rate of around 1 per cent in the past year, concentrated in part-time jobs. Nonetheless, the number of job vacancies has increased and employment can be expected to strengthen as the general pace of activity picks up over the course of the year. Employment growth has lagged behind the overall pace of economic activity. Total employment has been growing at a rate of around 1 per cent in the past year, concentrated in part-time jobs. Nonetheless, the number of job vacancies has increased and employment can be expected to strengthen as the general pace of activity picks up over the course of the year. 60 3 99 12 11.68000 46.94500 2 4 1 1997_5_4 4 1 1 7 0 13 6 0 15 2 1 0 1 2 1 1 4 0 4 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1.64 1.64 11.48 0.00 21.31 9.84 0.00 24.59 3.28 1.64 0.00 1.64 3.28 1.64 1.64 6.56 0.00 6.56 0.00 1.64 0.00 0.00 1.64 0 0 0 0 0 0 0 0 0 0 4 1 1 2 2 4 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9.09 18.18 18.18 36.36 0.00 0.00 0.00 0.00 0.00 0 0.00 0.00 9.09 0 9.09 0.00 0.00 0.00 0 0 0.00 0 0.00 0 0.00 0.00 0.00 0 0 0 0 0 NN NN VBZ 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 4 2 1 0 2 0 0 0 0 0 0 0 0 10 5 5 0 8 0 1 0 0 0 0 0 0 21 12 7 0 15 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 20.00000 1.650000
5 1997_5 1997 may 5 Introduction The favourable near-term outlook for inflation is being underpinned by continued help from the exchange rate in holding down import prices. Also helpful to the outlook is the result of the recent Safety-Net Review by the Australian Industrial Relations Commission, which delivered only moderate increases in award wages. However, other developments in labour costs are of more concern for the longer-term inflation outlook. Wage increases under enterprise bargaining continue to be in the 4 to 5 per cent range, figures which appear high in a climate of 2 per cent inflation and 8½ per cent unemployment. Aggregate wages data, which encompass workers on award wages, enterprise agreements and other bargaining arrangements, suggest that the overall pace of wages growth has picked up recently. These figures will need to be closely watched to assess the extent to which they represent a significant change in trend but, taken at face value, they suggest that wages growth is becoming uncomfortably high. The favourable near-term outlook for inflation is being underpinned by continued help from the exchange rate in holding down import prices. Also helpful to the outlook is the result of the recent Safety-Net Review by the Australian Industrial Relations Commission, which delivered only moderate increases in award wages. However, other developments in labour costs are of more concern for the longer-term inflation outlook. Wage increases under enterprise bargaining continue to be in the 4 to 5 per cent range, figures which appear high in a climate of 2 per cent inflation and 8½ per cent unemployment. Aggregate wages data, which encompass workers on award wages, enterprise agreements and other bargaining arrangements, suggest that the overall pace of wages growth has picked up recently. These figures will need to be closely watched to assess the extent to which they represent a significant change in trend but, taken at face value, they suggest that wages growth is becoming uncomfortably high. 155 6 276 33 15.49661 29.97159 8 9 4 1997_5_5 5 3 4 13 0 23 17 1 36 16 6 0 6 4 1 3 4 7 5 4 1 0 2 2 0 0 0 0 0 0 0 0 0 0 0 0 1.90 2.53 8.23 0.00 14.56 10.76 0.63 22.78 10.13 3.80 0.00 3.80 2.53 0.63 1.90 2.53 4.43 3.16 2.53 0.63 0.00 1.27 1.27 0 0 0 0 0 0 0 0 0 0 5 1 2 4 3 6 1 0 0 2 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 9.52 19.05 14.29 28.57 4.76 0.00 0.00 9.52 0.00 0 0.00 0.00 4.76 0 4.76 0.00 0.00 0.00 0 0 0.00 0 0.00 0 0.00 0.00 4.76 0 0 0 0 0 DT JJ IN 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 1 0 0 0 1 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 8 4 2 0 4 0 0 0 0 0 0 0 1 10 2 6 2 14 1 1 0 1 0 0 0 3 57 20 18 6 29 4 4 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 25.83333 1.780645
6 1997_5 1997 may 5 Introduction The capacity of the economy to grow faster while maintaining low inflation will depend importantly on the future behaviour of wages. A significant acceleration of aggregate wages in response to stronger economic growth would directly curtail job creation. It would also threaten faster inflation – to which monetary policy would have to respond – and thereby put at risk the potential for faster non-inflationary growth. Lessening of wages growth would, on the other hand, enable faster growth without risk of acceleration in inflation. The capacity of the economy to grow faster while maintaining low inflation will depend importantly on the future behaviour of wages. A significant acceleration of aggregate wages in response to stronger economic growth would directly curtail job creation. It would also threaten faster inflation – to which monetary policy would have to respond – and thereby put at risk the potential for faster non-inflationary growth. Lessening of wages growth would, on the other hand, enable faster growth without risk of acceleration in inflation. 83 4 156 21 14.68081 26.76652 2 7 0 1997_5_6 6 1 0 6 0 13 10 4 21 3 5 1 4 7 0 1 0 1 0 1 5 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1.19 0.00 7.14 0.00 15.48 11.90 4.76 25.00 3.57 5.95 1.19 4.76 8.33 0.00 1.19 0.00 1.19 0.00 1.19 5.95 0.00 1.19 0.00 0 0 0 0 0 0 0 0 0 0 6 1 3 4 2 4 1 1 2 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 14.29 19.05 9.52 19.05 4.76 4.76 9.52 4.76 0.00 0 4.76 0.00 0.00 0 0.00 0.00 4.76 0.00 0 0 0.00 0 4.76 0 0.00 0.00 0.00 0 0 0 0 0 DT NN IN 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 8 3 3 2 5 0 2 0 0 0 0 0 0 11 5 1 0 2 0 0 0 0 0 0 0 0 36 13 8 3 17 1 5 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 20.75000 1.879518

3.2 Scoring using reasoning model

Score the time-series text data from RBA SMP from 1997 to 2020 using two reasoning models: economist-reasoning model and non economist reasoning model. The prediction results are saved in two columns: eco_content and noneco_content. A snapshot of the output table is:

3.2.1 Reasoning results

mydata <- smp_text_feature_final
#prediction
probsTest_model_eco <- predict(model_eco_content,mydata, type = "prob") %>% as.data.frame()
probsTest_model_noneco <- predict(model_noneco_content,mydata, type = "prob") %>% as.data.frame()

#extract the prediction results and construct the final charts
probsTest_model_eco$question_index <- mydata$question_index

predict_result <- data.frame(question_index = mydata$question_index,
                             eco_content = probsTest_model_eco$high, 
                             noneco_content = probsTest_model_noneco$high)

output_data_content <- base::cbind(smp_text_feature_final$question_index,
                                   predict_result,
                                   paragraph= smp_text_feature_final$paragraph,
                                   fk_grade_level = smp_text_feature_final$fk_grade_level, 
                                   year = smp_text_feature_final$year, 
                                   month = smp_text_feature_final$month,
                                   word_count_stats = smp_text_feature_final$word_count_stats,
                                   sentence_count = smp_text_feature_final$sentence_count,
                                   FRES_score = smp_text_feature_final$FRES_score)

# write.csv(output_data_content, "smp_predict_result_content.csv") ##save results
output_data_content %>% head() %>% kbl() %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "200px")
smp_text_feature_final$question_index question_index eco_content noneco_content paragraph fk_grade_level year month word_count_stats sentence_count FRES_score
1997_5_1 1997_5_1 0.34 0.4766667 The economy moved through a period of slower growth in 1996 during which inflationary pressures eased significantly. The March quarter CPI result confirms that underlying inflation has returned to an annual rate of close to 2 per cent, and the prospects of inflation remaining low in the near future appear to be good. With some surplus capacity existing there is scope for the economy to grow more quickly in 1997 without generating significant inflationary pressures, provided growth in labour costs is not excessive. 16.49250 1997 may 80 3 24.31583
1997_5_2 1997_5_2 0.29 0.3800000 So far in 1997 there have already been some indications that the pace of growth is picking up, particularly in areas of construction investment, housing and consumer spending. Growth is being supported by several factors including the effect of the three policy easings in the second half of 1996, which brought cash rates down by 1½ percentage points. The impact of those cash rate reductions on home mortgage borrowers has been reinforced by a significant compression of intermediaries’ interest margins in that area. The lower interest rates now in place should be supportive of interest-sensitive areas of activity, particularly housing and non-residential construction, as well as helping household and business cash flows. Other factors favourable to growth at present include the strong US economy, moderately rising commodity prices, and an historically good level of business profitability in many industries. 17.23177 1997 may 137 5 20.32181
1997_5_3 1997_5_3 0.40 0.2833333 Conditions in the manufacturing sector have been an exception to this generally firmer picture, with profitability under pressure and investment intentions declining. The pressure on profitability reflects a combination of rising wage costs and flat selling prices, which continue to be constrained in many parts of the manufacturing sector by strong international competition. With the exchange rate having risen in trade-weighted terms, pressures on competitiveness have intensified during the past year. Looking ahead, however, a number of areas of domestic manufacturing are likely to benefit from the expansion in housing and non-residential construction now under way. 17.61583 1997 may 96 4 11.51250
1997_5_4 1997_5_4 0.47 0.3666667 Employment growth has lagged behind the overall pace of economic activity. Total employment has been growing at a rate of around 1 per cent in the past year, concentrated in part-time jobs. Nonetheless, the number of job vacancies has increased and employment can be expected to strengthen as the general pace of activity picks up over the course of the year. 11.68000 1997 may 60 3 46.94500
1997_5_5 1997_5_5 0.63 0.7133333 The favourable near-term outlook for inflation is being underpinned by continued help from the exchange rate in holding down import prices. Also helpful to the outlook is the result of the recent Safety-Net Review by the Australian Industrial Relations Commission, which delivered only moderate increases in award wages. However, other developments in labour costs are of more concern for the longer-term inflation outlook. Wage increases under enterprise bargaining continue to be in the 4 to 5 per cent range, figures which appear high in a climate of 2 per cent inflation and 8½ per cent unemployment. Aggregate wages data, which encompass workers on award wages, enterprise agreements and other bargaining arrangements, suggest that the overall pace of wages growth has picked up recently. These figures will need to be closely watched to assess the extent to which they represent a significant change in trend but, taken at face value, they suggest that wages growth is becoming uncomfortably high. 15.49661 1997 may 155 6 29.97159
1997_5_6 1997_5_6 0.67 0.6933333 The capacity of the economy to grow faster while maintaining low inflation will depend importantly on the future behaviour of wages. A significant acceleration of aggregate wages in response to stronger economic growth would directly curtail job creation. It would also threaten faster inflation – to which monetary policy would have to respond – and thereby put at risk the potential for faster non-inflationary growth. Lessening of wages growth would, on the other hand, enable faster growth without risk of acceleration in inflation. 14.68081 1997 may 83 4 26.76652

3.2.2 Regenerate predictions after removing top features one by one

To get an idea of whether the average effect of a particular variable is positive or negative, we rerun the models for SMP sample paragraphs after removing top five variables one by one. Based on the difference between the new results and the original ones, we classify the partial effect of a variable as positive or negative. The results are discussed in Section 6.3 of the paper.

(1) Eco-Reasoning model

Regenerate model predictions using the two economist reasoning (content) models after removing the top 5 variables one by one. After that, we save the results as one table, which is mapped back to the original prediction results (in columns eco_content and noneco_content). A snapshot of the output is:

#top variables
eco_content_var_list <- c("VB","pos_prop_VB","pos_prop_NN","digit_count","pos_prop_MD")
eco_clarity_var_list <- c("NN","pos_prop_VB","pos_prop_CC","pos_prop_VBP","pos_prop_VB")
noneco_clarity_var_list <- c("NP","pos_prop_JJ","pos_prop_DT","readability_stats.sylls","fk_grade_level")

##ECO-CONTENT
#1
var_test_data <- mydata
var_test_data$VB<-0
probsTest_model_eco_VB <- predict(model_eco_content,var_test_data, type = "prob") %>% as.data.frame()
#2
var_test_data <- mydata
var_test_data$pos_prop_VB<-0
probsTest_model_eco_propVB <- predict(model_eco_content,var_test_data, type = "prob") %>% as.data.frame()
#3
var_test_data <- mydata
var_test_data$pos_prop_NN<-0
probsTest_model_eco_propNN <- predict(model_eco_content,var_test_data, type = "prob") %>% as.data.frame()
#4
var_test_data <- mydata
var_test_data$pos_prop_MD<-0
probsTest_model_eco_propMD <- predict(model_eco_content,var_test_data, type = "prob") %>% as.data.frame()
#5
var_test_data <- mydata
var_test_data$digit_count<-0
probsTest_model_eco_digit <- predict(model_eco_content,var_test_data, type = "prob") %>% as.data.frame()

# results for economist_content model
predict_result_drop_variable <- data.frame(question_index = mydata$question_index,
                                           drop_VB = probsTest_model_eco_VB$high, 
                                           drop_propVB = probsTest_model_eco_propVB$high,
                                           drop_propNN = probsTest_model_eco_propNN$high,
                                           drop_MD = probsTest_model_eco_propMD$high,
                                           drop_digit = probsTest_model_eco_digit$high)

result_compare_eco_contnet_var_drop <- cbind(predict_result_drop_variable,predict_result)

result_compare_eco_contnet_var_drop %>% 
  head() %>% kbl() %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "200px")
question_index drop_VB drop_propVB drop_propNN drop_MD drop_digit question_index eco_content noneco_content
1997_5_1 0.29 0.27 0.38 0.34 0.48 1997_5_1 0.34 0.4766667
1997_5_2 0.29 0.29 0.33 0.23 0.40 1997_5_2 0.29 0.3800000
1997_5_3 0.34 0.33 0.52 0.40 0.40 1997_5_3 0.40 0.2833333
1997_5_4 0.47 0.36 0.55 0.39 0.48 1997_5_4 0.47 0.3666667
1997_5_5 0.63 0.57 0.70 0.55 0.66 1997_5_5 0.63 0.7133333
1997_5_6 0.66 0.58 0.77 0.60 0.67 1997_5_6 0.67 0.6933333

(2) Noneco-Reasoning model

Regenerate model predictions using the two non-economist reasoning (content) model after removing the top 5 variables one by one. After that, we save the results as one table, which is mapped back to the original prediction results. A snapshot of the output is:

##NONECO-CONTENT / model_noneco_content

noneco_content_var_list <- c("pos_prob_VB","pos_prop_MD","pos_prop_JJ","pos_prop_IN","pos_prop_NN")
#1
var_test_data <- mydata
var_test_data$pos_prop_VB<-0
probsTest_model_noneco_propVB <- predict(model_noneco_content,var_test_data, type = "prob") %>% as.data.frame()
#2
var_test_data <- mydata
var_test_data$pos_prop_MD<-0
probsTest_model_noneco_propMD <- predict(model_noneco_content,var_test_data, type = "prob") %>% as.data.frame()
#3
var_test_data <- mydata
var_test_data$pos_prop_JJ<-0
probsTest_model_noneco_propJJ <- predict(model_noneco_content,var_test_data, type = "prob") %>% as.data.frame()
#4
var_test_data <- mydata
var_test_data$pos_prop_IN<-0
probsTest_model_noneco_propIN <- predict(model_noneco_content,var_test_data, type = "prob") %>% as.data.frame()
#5
var_test_data <- mydata
var_test_data$pos_prop_NN<-0
probsTest_model_noneco_propNN <- predict(model_noneco_content,var_test_data, type = "prob") %>% as.data.frame()

# results for noneconomist_content model
noneco_predict_result_drop_variable <- data.frame(question_index = mydata$question_index,
                                                  noneco_con_drop_propVB = probsTest_model_noneco_propVB$high,
                                                  noneco_con_drop_propMD = probsTest_model_noneco_propMD$high,
                                                  noneco_con_drop_propJJ = probsTest_model_noneco_propJJ$high,
                                                  noneco_con_drop_propIN = probsTest_model_noneco_propIN$high,
                                                  noneco_con_drop_propNN = probsTest_model_noneco_propNN$high)

result_compare_var_drop_content <- cbind(result_compare_eco_contnet_var_drop,
                                                noneco_predict_result_drop_variable)
# write.csv(result_compare_var_drop_content , "result_compare_var_drop_content.csv")


result_compare_var_drop_content %>% 
  head() %>% kbl() %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "200px")
question_index drop_VB drop_propVB drop_propNN drop_MD drop_digit question_index eco_content noneco_content question_index noneco_con_drop_propVB noneco_con_drop_propMD noneco_con_drop_propJJ noneco_con_drop_propIN noneco_con_drop_propNN
1997_5_1 0.29 0.27 0.38 0.34 0.48 1997_5_1 0.34 0.4766667 1997_5_1 0.4533333 0.4766667 0.4466667 0.5166667 0.5100000
1997_5_2 0.29 0.29 0.33 0.23 0.40 1997_5_2 0.29 0.3800000 1997_5_2 0.3833333 0.2466667 0.3200000 0.4366667 0.4433333
1997_5_3 0.34 0.33 0.52 0.40 0.40 1997_5_3 0.40 0.2833333 1997_5_3 0.2600000 0.2833333 0.2600000 0.3500000 0.4333333
1997_5_4 0.47 0.36 0.55 0.39 0.48 1997_5_4 0.47 0.3666667 1997_5_4 0.2800000 0.3033333 0.3266667 0.4400000 0.3766667
1997_5_5 0.63 0.57 0.70 0.55 0.66 1997_5_5 0.63 0.7133333 1997_5_5 0.6400000 0.5466667 0.5800000 0.7300000 0.7166667
1997_5_6 0.66 0.58 0.77 0.60 0.67 1997_5_6 0.67 0.6933333 1997_5_6 0.6400000 0.6033333 0.4666667 0.6766667 0.7233333

3.3 Scoring using clarity models

3.3.1 Model results

Score SMP paragraphs using two readability models: economist-readability model and non economist readability model. The prediction results are saved in the two columns of eco_clarity and noneco_clarity respectively. After that, we join the clarity output table with the content output table as the final output table. A snapshot of this table is:

## eco-clarity model
probsTest_model_eco_clarity <- predict(model_eco_clarity,mydata, type = "prob") %>% as.data.frame()

#add those variables to the dataset as they are not included in the out-of-sample dataset but are included in the model dataset
mydata$FW <- 0
mydata$pos_prop_FW <- 0
mydata$pos_word1_TO <- 0
mydata$pos_word2_EX <- 0


## noneco-clarity model
probsTest_model_noneco_clarity <- predict(model_noneco_clarity,mydata, type = "prob") %>% as.data.frame()

## extract prediction result for the clarity models
predict_result_clarity <- data.frame(question_index = mydata$question_index,
                             eco_clarity = probsTest_model_eco_clarity$high, 
                             noneco_clarity = probsTest_model_noneco_clarity$high)

#final output with content and clarity prediction results
prediction_smp <- left_join(predict_result_clarity, output_data_content,  by="question_index")

## taka a look of the results
prediction_smp %>% 
  head() %>% kbl() %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "200px")
question_index eco_clarity noneco_clarity smp_text_feature_final$question_index eco_content noneco_content paragraph fk_grade_level year month word_count_stats sentence_count FRES_score
1997_5_1 0.48 0.500 1997_5_1 0.34 0.4766667 The economy moved through a period of slower growth in 1996 during which inflationary pressures eased significantly. The March quarter CPI result confirms that underlying inflation has returned to an annual rate of close to 2 per cent, and the prospects of inflation remaining low in the near future appear to be good. With some surplus capacity existing there is scope for the economy to grow more quickly in 1997 without generating significant inflationary pressures, provided growth in labour costs is not excessive. 16.49250 1997 may 80 3 24.31583
1997_5_2 0.37 0.444 1997_5_2 0.29 0.3800000 So far in 1997 there have already been some indications that the pace of growth is picking up, particularly in areas of construction investment, housing and consumer spending. Growth is being supported by several factors including the effect of the three policy easings in the second half of 1996, which brought cash rates down by 1½ percentage points. The impact of those cash rate reductions on home mortgage borrowers has been reinforced by a significant compression of intermediaries’ interest margins in that area. The lower interest rates now in place should be supportive of interest-sensitive areas of activity, particularly housing and non-residential construction, as well as helping household and business cash flows. Other factors favourable to growth at present include the strong US economy, moderately rising commodity prices, and an historically good level of business profitability in many industries. 17.23177 1997 may 137 5 20.32181
1997_5_3 0.78 0.596 1997_5_3 0.40 0.2833333 Conditions in the manufacturing sector have been an exception to this generally firmer picture, with profitability under pressure and investment intentions declining. The pressure on profitability reflects a combination of rising wage costs and flat selling prices, which continue to be constrained in many parts of the manufacturing sector by strong international competition. With the exchange rate having risen in trade-weighted terms, pressures on competitiveness have intensified during the past year. Looking ahead, however, a number of areas of domestic manufacturing are likely to benefit from the expansion in housing and non-residential construction now under way. 17.61583 1997 may 96 4 11.51250
1997_5_4 0.49 0.516 1997_5_4 0.47 0.3666667 Employment growth has lagged behind the overall pace of economic activity. Total employment has been growing at a rate of around 1 per cent in the past year, concentrated in part-time jobs. Nonetheless, the number of job vacancies has increased and employment can be expected to strengthen as the general pace of activity picks up over the course of the year. 11.68000 1997 may 60 3 46.94500
1997_5_5 0.52 0.260 1997_5_5 0.63 0.7133333 The favourable near-term outlook for inflation is being underpinned by continued help from the exchange rate in holding down import prices. Also helpful to the outlook is the result of the recent Safety-Net Review by the Australian Industrial Relations Commission, which delivered only moderate increases in award wages. However, other developments in labour costs are of more concern for the longer-term inflation outlook. Wage increases under enterprise bargaining continue to be in the 4 to 5 per cent range, figures which appear high in a climate of 2 per cent inflation and 8½ per cent unemployment. Aggregate wages data, which encompass workers on award wages, enterprise agreements and other bargaining arrangements, suggest that the overall pace of wages growth has picked up recently. These figures will need to be closely watched to assess the extent to which they represent a significant change in trend but, taken at face value, they suggest that wages growth is becoming uncomfortably high. 15.49661 1997 may 155 6 29.97159
1997_5_6 0.27 0.320 1997_5_6 0.67 0.6933333 The capacity of the economy to grow faster while maintaining low inflation will depend importantly on the future behaviour of wages. A significant acceleration of aggregate wages in response to stronger economic growth would directly curtail job creation. It would also threaten faster inflation – to which monetary policy would have to respond – and thereby put at risk the potential for faster non-inflationary growth. Lessening of wages growth would, on the other hand, enable faster growth without risk of acceleration in inflation. 14.68081 1997 may 83 4 26.76652
## export results
# write.csv(prediction_smp, "smp_prediction_results.csv")

3.3.2 Regenerate predictions after removing top features one by one

In this section, we explore how predictions would change for the two readability models after removing top 5 features one by one.

(1) Eco-Readability model

Regenerate model predictions using the economist readability (content) models by removing the top 5 variables one by one. A snapshot of the output is:

### drop variable to how prediction results change
#1
clarity_var_data <- mydata
clarity_var_data$NN <- 0
drop_var_eco_clarity_NN <- predict(model_eco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
#2
clarity_var_data <- mydata
clarity_var_data$pos_prop_VB <- 0
drop_var_eco_clarity_propVB <- predict(model_eco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
#3
clarity_var_data <- mydata
clarity_var_data$pos_prop_CC <- 0
drop_var_eco_clarity_propCC <- predict(model_eco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
#4
clarity_var_data <- mydata
clarity_var_data$pos_prop_VBP <- 0
drop_var_eco_clarity_propVBP <- predict(model_eco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
#5
clarity_var_data <- mydata
clarity_var_data$pos_prop_RB <- 0
drop_var_eco_clarity_propRB <- predict(model_eco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()


drop_var_eco_results <- data.frame(question_index = mydata$question_index,
                                   var_eco_clarity_NN = drop_var_eco_clarity_NN$high,
                                   var_eco_clarity_propVB = drop_var_eco_clarity_propVB$high,
                                   var_eco_clarity_propCC = drop_var_eco_clarity_propCC$high,
                                   var_eco_clarity_propVBP = drop_var_eco_clarity_propVBP$high,
                                   var_eco_clarity_propRB = drop_var_eco_clarity_propRB$high)


var_drop_result_eco_clarity <- cbind(predict_result_clarity, drop_var_eco_results)

#Take a look of the data
var_drop_result_eco_clarity %>%
  head() %>% kbl() %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "200px")
question_index eco_clarity noneco_clarity question_index var_eco_clarity_NN var_eco_clarity_propVB var_eco_clarity_propCC var_eco_clarity_propVBP var_eco_clarity_propRB
1997_5_1 0.48 0.500 1997_5_1 0.51 0.46 0.40 0.51 0.60
1997_5_2 0.37 0.444 1997_5_2 0.42 0.36 0.26 0.42 0.55
1997_5_3 0.78 0.596 1997_5_3 0.73 0.72 0.54 0.75 0.80
1997_5_4 0.49 0.516 1997_5_4 0.53 0.52 0.38 0.49 0.59
1997_5_5 0.52 0.260 1997_5_5 0.53 0.48 0.38 0.44 0.60
1997_5_6 0.27 0.320 1997_5_6 0.26 0.38 0.28 0.25 0.39
## export results
#save the data
#write.csv(var_drop_result_eco_clarity, "drop_var_eco_clarity_results.csv")

2) Noneco-Readability model

Regenerate model predictions using the non-economist readability (content) models by removing the top 5 variables one by one. A snapshot of the output is:

#1
clarity_var_data <- mydata
clarity_var_data$NP <- 0
drop_var_noneco_clarity_NP <- predict(model_noneco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
#2
clarity_var_data <- mydata
clarity_var_data$pos_prop_JJ <- 0
drop_var_noneco_clarity_propJJ <- predict(model_noneco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
#3
clarity_var_data <- mydata
clarity_var_data$pos_prop_DT <- 0
drop_var_noneco_clarity_propDT <- predict(model_noneco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
#4
clarity_var_data <- mydata
clarity_var_data$readability_stats.sylls <- 0
drop_var_noneco_clarity_sylls <- predict(model_noneco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
#5
clarity_var_data <- mydata
clarity_var_data$fk_grade_level <- 0
drop_var_noneco_clarity_fk <- predict(model_noneco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()


drop_var_noneco_results <- data.frame(question_index = mydata$question_index,
                                   var_noneco_clarity_NP = drop_var_noneco_clarity_NP$high,
                                   var_noneco_clarity_propJJ = drop_var_noneco_clarity_propJJ$high,
                                   var_noneco_clarity_propDT = drop_var_noneco_clarity_propDT$high,
                                   var_noneco_clarity_sylls = drop_var_noneco_clarity_sylls$high,
                                   var_noneco_clarity_fk = drop_var_noneco_clarity_fk$high)

predict_result_clarity <- data.frame(question_index = mydata$question_index,
                             eco_clarity = probsTest_model_eco_clarity$high, 
                             noneco_clarity = probsTest_model_noneco_clarity$high)


var_drop_result <- cbind(predict_result_clarity, drop_var_noneco_results)

##take a look of the result
var_drop_result %>%
  head() %>% kbl() %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "200px")
question_index eco_clarity noneco_clarity question_index var_noneco_clarity_NP var_noneco_clarity_propJJ var_noneco_clarity_propDT var_noneco_clarity_sylls var_noneco_clarity_fk
1997_5_1 0.48 0.500 1997_5_1 0.488 0.536 0.492 0.480 0.492
1997_5_2 0.37 0.444 1997_5_2 0.464 0.468 0.456 0.488 0.476
1997_5_3 0.78 0.596 1997_5_3 0.596 0.592 0.572 0.596 0.592
1997_5_4 0.49 0.516 1997_5_4 0.540 0.540 0.500 0.504 0.472
1997_5_5 0.52 0.260 1997_5_5 0.308 0.256 0.284 0.324 0.300
1997_5_6 0.27 0.320 1997_5_6 0.396 0.360 0.344 0.356 0.340
## export results

##save the data
# write.csv(var_drop_result, "clarity_model_var_drop_result.csv")

4 Score Cross-sectional text

Conduct a cross-sectional comparison by scoring sample paragraphs from variables sources, including the Bank of England (BOE) Inflation Report Introduction and Boxes, RBA Speeches, articles from The Economist , and the RBA SMP Introduction and Boxes published in 2018 and 2019. The output is saved in the data output folder, and the results are discussed in section 7.2 of the paper.

4.1 Prepare data

Import the sample paragraphs with all text features.A snapshot of the text data is shown as below:

#read data
cross_data <- readRDS("./data_input/cross_analysis_feature1_final.rds")

#clean data
cross_data$word_per_sentence <- cross_data$word_count_stats/cross_data$sentence_count
cross_data$sylls_per_word <- cross_data$readability_stats.sylls/cross_data$word_count_stats
cross_data[is.na(cross_data)] <-0 # replace NA with 0s
cross_data <- cross_data %>% select(-index, -index.y)

names(cross_data) <- gsub(x = names(cross_data), pattern = "word_1st_", replacement = "pos_word1_") 
names(cross_data) <- gsub(x = names(cross_data), pattern = "word_2nd_", replacement = "pos_word2_") 
names(cross_data) <- gsub(x = names(cross_data), pattern = "word_3rd_", replacement = "pos_word3_") 
names(cross_data) <- gsub(x = names(cross_data), pattern ='[$]', replacement = 'ds')

cross_data %>%
  head() %>% kbl() %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "200px")
text_source autor year paragraph question_index paragraph_clean index.x word_count_stats sentence_count readability_stats.sylls readability_stats.polys fk_grade_level FRES_score comma_count punc_count digit_count CC CD DT IN JJ MD NN NNS POS PRP PRPds RB TO VB VBG VBN VBP VBZ WDT WRB VBD EX WPds JJR RP JJS NNPS RBR RBS PDT FW WP NNP UH pos_prop_CC pos_prop_CD pos_prop_DT pos_prop_IN pos_prop_JJ pos_prop_MD pos_prop_NN pos_prop_NNS pos_prop_POS pos_prop_PRP pos_prop_PRPds pos_prop_RB pos_prop_TO pos_prop_VB pos_prop_VBG pos_prop_VBN pos_prop_VBP pos_prop_VBZ pos_prop_WDT pos_prop_WRB pos_prop_VBD pos_prop_EX pos_prop_WPds pos_prop_JJR pos_prop_RP pos_prop_JJS pos_prop_NNPS pos_prop_RBR pos_prop_RBS pos_prop_PDT pos_prop_FW pos_prop_WP pos_prop_NNP pos_prop_UH sent_1st_CC sent_1st_DT sent_1st_IN sent_1st_JJ sent_1st_NN sent_1st_NNS sent_1st_TO sent_1st_VBD sent_1st_VBG sent_1st_CD sent_1st_JJR sent_1st_PRP sent_1st_VB sent_1st_VBZ sent_1st_MD sent_1st_RB sent_1st_RBR sent_1st_VBN sent_1st_VBP sent_1st_POS sent_1st_PRPds sent_1st_WDT sent_1st_JJS sent_1st_RP sent_1st_WRB sent_1st_EX sent_1st_PDT sent_1st_NNP sent_1st_RBS sent_1st_WP sent_1st_FW sent_1st_prop_CC sent_1st_prop_DT sent_1st_prop_IN sent_1st_prop_JJ sent_1st_prop_NN sent_1st_prop_NNS sent_1st_prop_TO sent_1st_prop_VBD sent_1st_prop_VBG sent_1st_prop_CD sent_1st_prop_JJR sent_1st_prop_PRP sent_1st_prop_VB sent_1st_prop_VBZ sent_1st_prop_MD sent_1st_prop_RB sent_1st_prop_RBR sent_1st_prop_VBN sent_1st_prop_VBP sent_1st_prop_POS sent_1st_prop_PRPds sent_1st_prop_WDT sent_1st_prop_JJS sent_1st_prop_RP sent_1st_prop_WRB sent_1st_prop_EX sent_1st_prop_PDT sent_1st_prop_NNP sent_1st_prop_RBS sent_1st_prop_WP sent_1st_prop_FW start_word_pos second_word_pos third_word_pos sent1st_clue_Attitudinal sent1st_clue_connective sent1st_clue_Contrast sent1st_clue_detail sent1st_clue_inference sent1st_clue_reformulation sent1st_clue_summary sent1st_clue_transition sentlast_clue_Attitudinal sentlast_clue_connective sentlast_clue_Contrast sentlast_clue_detail sentlast_clue_emphasis sentlast_clue_inference sentlast_clue_reformulation sentlast_clue_summary sentlast_clue_transition sentmiddle_clue_Attitudinal sentmiddle_clue_connective sentmiddle_clue_Contrast sentmiddle_clue_detail sentmiddle_clue_emphasis sentmiddle_clue_inference sentmiddle_clue_reformulation sentmiddle_clue_summary sent_1st_word_ds sent_1st_word_, sent_1st_word_CC sent_1st_word_CD sent_1st_word_DT sent_1st_word_EX sent_1st_word_FW sent_1st_word_IN sent_1st_word_JJ sent_1st_word_JJR sent_1st_word_JJS sent_1st_word_MD sent_1st_word_NN sent_1st_word_NNP sent_1st_word_NNS sent_1st_word_PDT sent_1st_word_POS sent_1st_word_PRP sent_1st_word_PRPds sent_1st_word_RB sent_1st_word_RBR sent_1st_word_RBS sent_1st_word_RP sent_1st_word_TO sent_1st_word_VB sent_1st_word_VBD sent_1st_word_VBG sent_1st_word_VBN sent_1st_word_VBP sent_1st_word_VBZ sent_1st_word_WDT sent_1st_word_WP sent_1st_word_WPds sent_1st_word_WRB sent_1st_parse_ADJP sent_1st_parse_ADVP sent_1st_parse_NP sent_1st_parse_PP sent_1st_parse_S sent_1st_parse_SBAR sent_1st_parse_SINV sent_1st_parse_VP sent_1st_parse_WHADVP sent_1st_parse_WHNP sent_1st_parse_WHPP sent_1st_parse_SBARQ sent_1st_parse_SQ sent_last_parse_ADJP sent_last_parse_ADVP sent_last_parse_NP sent_last_parse_PP sent_last_parse_S sent_last_parse_SBAR sent_last_parse_SINV sent_last_parse_VP sent_last_parse_WHADVP sent_last_parse_WHNP sent_last_parse_WHPP sent_last_parse_SBARQ sent_last_parse_SQ ADJP ADVP NP PP S SBAR SINV VP WHADVP WHNP WHPP SBARQ SQ pos_word1_, pos_word1_0 pos_word1_CC pos_word1_CD pos_word1_DT pos_word1_EX pos_word1_FW pos_word1_IN pos_word1_JJ pos_word1_JJR pos_word1_JJS pos_word1_MD pos_word1_NN pos_word1_NNP pos_word1_NNS pos_word1_PDT pos_word1_PRP pos_word1_PRPds pos_word1_RB pos_word1_RBR pos_word1_RBS pos_word1_RP pos_word1_TO pos_word1_VB pos_word1_VBD pos_word1_VBG pos_word1_VBN pos_word1_VBP pos_word1_VBZ pos_word1_WDT pos_word1_WP pos_word1_WRB pos_word2_CC pos_word2_CD pos_word2_DT pos_word2_EX pos_word2_FW pos_word2_IN pos_word2_JJ pos_word2_JJR pos_word2_JJS pos_word2_LS pos_word2_MD pos_word2_NN pos_word2_NNP pos_word2_NNS pos_word2_PRP pos_word2_PRPds pos_word2_RB pos_word2_RBS pos_word2_RP pos_word2_TO pos_word2_VB pos_word2_VBD pos_word2_VBG pos_word2_VBN pos_word2_VBP pos_word2_VBZ pos_word2_WDT pos_word2_WP pos_word2_WRB pos_word2_<NA> pos_word3_, pos_word3_0 pos_word3_CC pos_word3_CD pos_word3_DT pos_word3_EX pos_word3_FW pos_word3_IN pos_word3_JJ pos_word3_JJR pos_word3_JJS pos_word3_MD pos_word3_NN pos_word3_NNP pos_word3_NNS pos_word3_PDT pos_word3_PRP pos_word3_PRPds pos_word3_RB pos_word3_RBR pos_word3_RBS pos_word3_RP pos_word3_TO pos_word3_VB pos_word3_VBD pos_word3_VBG pos_word3_VBN pos_word3_VBP pos_word3_VBZ pos_word3_WDT pos_word3_WP pos_word3_WRB word_per_sentence sylls_per_word
speech Guy Debelle[*]Deputy Governor 2018 When reading through the Bank’s forecasts, I think it is useful to avoid false precision. An important question to ask is: are these revisions to the Bank’s outlook consequential for the monetary policy decision? Similarly you can ask, do I think these changes affect my own decisions about my household or my business? A tenth or two of a percentage point here or there on the outlook for GDP or inflation is unlikely to matter that much for any of those decisions. Often, these revisions reflect the new information that has come to hand over the previous quarter. This leads us to reassess the starting point for our forecasts of where the economy is then likely to go. In making this assessment, we ask whether the incoming data have been view-changing or view-validating. Over the previous three months , the data have generally been view-validating. 2 When reading through the Bank’s forecasts, I think it is useful to avoid false precision. An important question to ask is: are these revisions to the Bank’s outlook consequential for the monetary policy decision? Similarly you can ask, do I think these changes affect my own decisions about my household or my business? A tenth or two of a percentage point here or there on the outlook for GDP or inflation is unlikely to matter that much for any of those decisions. Often, these revisions reflect the new information that has come to hand over the previous quarter. This leads us to reassess the starting point for our forecasts of where the economy is then likely to go. In making this assessment, we ask whether the incoming data have been view-changing or view-validating. Over the previous three months , the data have generally been view-validating. 1 144 8 233 25 10.523056 51.67750 5 14 0 5 2 21 15 15 1 21 10 2 6 4 6 7 9 3 3 7 6 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3.42 1.37 14.38 10.27 10.27 0.68 14.38 6.85 1.37 4.11 2.74 4.11 4.79 6.16 2.05 2.05 4.79 4.11 0.68 1.37 0.00 0.00 0.00 0.00 0.00 0.00 0 0 0 0 0 0 0 0 1 5 4 1 6 3 2 0 2 1 1 1 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3.33 16.67 13.33 3.33 20.00 10.00 6.67 0.00 6.67 3.33 3.33 3.33 6.67 3.33 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0 0 0 0 0 0 0 0 0 DT VBP NN 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 2 1 1 0 0 1 0 0 0 0 0 1 0 3 1 2 1 0 4 0 0 0 0 0 6 6 86 32 21 6 0 28 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 18.00000 1.618056
speech Guy Debelle[*]Deputy Governor 2018 As I just said, the forecasts we have just published are little changed from those we published in February. Table 1 shows the Bank’s outlook for GDP growth, the unemployment rate and inflation until June 2020. This is generally the time horizon that is relevant for the Board’s deliberations on monetary policy. 4 As I just said, the forecasts we have just published are little changed from those we published in February. Table 1 shows the Bank’s outlook for GDP growth, the unemployment rate and inflation until June 2020. This is generally the time horizon that is relevant for the Board’s deliberations on monetary policy. 2 50 3 87 10 11.442000 42.71433 2 5 5 1 2 7 7 3 0 14 2 2 2 0 4 0 0 0 3 2 3 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1.85 3.70 12.96 12.96 5.56 0.00 25.93 3.70 3.70 3.70 0.00 7.41 0.00 0.00 0.00 5.56 3.70 5.56 1.85 0.00 1.85 0.00 0.00 0.00 0.00 0.00 0 0 0 0 0 0 0 0 1 2 3 3 4 3 1 0 0 0 1 0 1 1 0 2 1 1 2 0 0 0 0 0 0 0 0 0 0 0 0 3.85 7.69 11.54 11.54 15.38 11.54 3.85 0.00 0.00 0.00 3.85 0.00 3.85 3.85 0.00 7.69 3.85 3.85 7.69 0.00 0.00 0.00 0 0 0 0 0 0 0 0 0 JJ JJ DT 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 3 3 1 0 6 0 0 0 0 0 0 0 2 1 1 0 0 2 0 1 0 1 1 0 0 25 11 9 3 0 19 0 2 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 16.66667 1.740000
speech Guy Debelle[*]Deputy Governor 2018 Other developments in financial conditions can affect the forecasts too. One example is developments in money market rates. In the SMP, we document the recent rise in money market interest rates in the US, particularly LIBOR . There are a number of explanations for the rise, including a large increase in bill issuance by the US Treasury and the effect of various tax changes on investment decisions by CFOs at some US companies with large cash pools. This increases the wholesale funding costs for the Australian banks, as well as increasing the costs for borrowers whose lending rates are priced off BBSW, which includes many corporates. 7 Other developments in financial conditions can affect the forecasts too. One example is developments in money market rates. In the SMP, we document the recent rise in money market interest rates in the US, particularly LIBOR . There are a number of explanations for the rise, including a large increase in bill issuance by the US Treasury and the effect of various tax changes on investment decisions by CFOs at some US companies with large cash pools. This increases the wholesale funding costs for the Australian banks, as well as increasing the costs for borrowers whose lending rates are priced off BBSW, which includes many corporates. 3 105 5 173 22 12.041905 46.13143 5 5 0 1 1 14 18 9 1 23 18 0 4 0 4 0 1 2 1 2 3 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0.95 0.95 13.33 17.14 8.57 0.95 21.90 17.14 0.00 3.81 0.00 3.81 0.00 0.95 1.90 0.95 1.90 2.86 0.95 0.00 0.00 0.95 0.95 0.00 0.00 0.00 0 0 0 0 0 0 0 0 1 5 10 3 11 5 2 2 0 1 0 0 2 0 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 2.17 10.87 21.74 6.52 23.91 10.87 4.35 4.35 0.00 2.17 0.00 0.00 4.35 0.00 0.00 2.17 2.17 2.17 0.00 2.17 0.00 0.00 0 0 0 0 0 0 0 0 0 NN VB TO 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 12 6 3 0 0 4 0 0 0 0 0 1 1 2 1 2 0 0 3 0 0 0 0 0 3 5 40 15 15 3 0 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 21.00000 1.647619
speech Guy Debelle[*]Deputy Governor 2018 However, the effect to date has not been that large in terms of the overall impact on bank funding costs. Thus far, it has not been a consequential development from a forecasting point of view. It is not clear how much of the rise in LIBOR is due to structural changes in money markets and how much is temporary. In the last couple of weeks, these money market rates have declined noticeably from their peaks. We will continue to monitor how this unfolds in the period ahead. 8 However, the effect to date has not been that large in terms of the overall impact on bank funding costs. Thus far, it has not been a consequential development from a forecasting point of view. It is not clear how much of the rise in LIBOR is due to structural changes in money markets and how much is temporary. In the last couple of weeks, these money market rates have declined noticeably from their peaks. We will continue to monitor how this unfolds in the period ahead. 4 87 5 127 11 8.421287 65.67745 3 5 0 1 0 9 13 9 1 16 7 0 3 1 9 3 2 0 3 1 6 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1.15 0.00 10.34 14.94 10.34 1.15 18.39 8.05 0.00 3.45 1.15 10.34 3.45 2.30 0.00 3.45 1.15 6.90 0.00 3.45 0.00 0.00 0.00 0.00 0.00 0.00 0 0 0 0 0 0 0 0 1 1 3 1 5 2 0 0 0 0 0 0 2 0 1 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 5.26 5.26 15.79 5.26 26.32 10.53 0.00 0.00 0.00 0.00 0.00 0.00 10.53 0.00 5.26 0.00 0.00 5.26 0.00 0.00 5.26 5.26 0 0 0 0 0 0 0 0 0 IN JJ IN 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 12 4 2 1 0 2 0 0 0 0 0 1 1 18 7 3 2 0 4 0 0 0 0 0 7 4 69 28 18 6 0 25 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 17.40000 1.459770
speech Guy Debelle[*]Deputy Governor 2018 The outlook for the economy in 2018 and 2019 is expected to be a little stronger than occurred in 2017. GDP growth is expected to pick up from around 2½ per cent currently to be around 3¼ over the next couple of years. 9 The outlook for the economy in 2018 and 2019 is expected to be a little stronger than occurred in 2017. GDP growth is expected to pick up from around 2½ per cent currently to be around 3¼ over the next couple of years. 5 38 2 60 4 10.451579 53.97105 0 2 14 1 5 4 10 1 0 6 1 0 0 0 2 3 3 0 3 0 2 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 2.33 11.63 9.30 23.26 2.33 0.00 13.95 2.33 0.00 0.00 0.00 4.65 6.98 6.98 0.00 6.98 0.00 4.65 0.00 0.00 0.00 0.00 0.00 2.33 2.33 0.00 0 0 0 0 0 0 0 0 1 3 3 5 12 1 1 0 1 0 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 3.33 10.00 10.00 16.67 40.00 3.33 3.33 0.00 3.33 0.00 0.00 0.00 0.00 3.33 0.00 0.00 0.00 3.33 0.00 3.33 0.00 0.00 0 0 0 0 0 0 0 0 0 VBG IN CC 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 5 1 4 1 0 5 0 0 0 0 0 0 0 3 1 3 1 0 5 0 0 0 0 0 0 1 8 2 7 2 0 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 19.00000 1.578947
speech Guy Debelle[*]Deputy Governor 2018 One part of the answer is that the global conjuncture is constructive. We have been witnessing the most synchronised pick-up in the global economy for quite some time. The US economy is doing well. Europe is doing better than it has been for the past decade. The Japanese economy has recorded its longest period of quarterly growth in almost three decades. The global conjuncture has been reflected in a pick-up in global industrial production and trade that is particularly beneficial to the east Asian region, which is leveraged to the global cycle. 11 One part of the answer is that the global conjuncture is constructive. We have been witnessing the most synchronised pick-up in the global economy for quite some time. The US economy is doing well. Europe is doing better than it has been for the past decade. The Japanese economy has recorded its longest period of quarterly growth in almost three decades. The global conjuncture has been reflected in a pick-up in global industrial production and trade that is particularly beneficial to the east Asian region, which is leveraged to the global cycle. 6 92 6 161 20 11.040000 43.22167 1 8 0 1 2 12 10 14 0 18 1 0 3 1 4 2 0 3 6 1 9 2 0 0 0 0 1 0 2 0 0 0 0 0 0 0 0 1.09 2.17 13.04 10.87 15.22 0.00 19.57 1.09 0.00 3.26 1.09 4.35 2.17 0.00 3.26 6.52 1.09 9.78 2.17 0.00 0.00 0.00 0.00 1.09 0.00 2.17 0 0 0 0 0 0 0 0 1 1 3 1 4 2 0 0 0 0 1 0 0 1 0 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 5.88 5.88 17.65 5.88 23.53 11.76 0.00 0.00 0.00 0.00 5.88 0.00 0.00 5.88 0.00 5.88 0.00 0.00 0.00 5.88 5.88 0.00 0 0 0 0 0 0 0 0 0 DT VBP VBD 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 5 1 0 0 1 0 0 0 0 0 3 1 19 7 5 2 0 5 0 0 0 0 0 5 3 57 23 15 2 0 15 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 15.33333 1.750000

4.2 Scoring using reasoning models

Score the cross-sectional dataset using reasoning models. A snapshot of the table is shown as below:

#model scoring
cross_content_model_eco <- predict(model_eco_content,cross_data, type = "prob") %>% as.data.frame()
cross_content_model_noneco <- predict(model_noneco_content,cross_data, type = "prob") %>% as.data.frame()

#extract prediction result
cross_predict_result <- data.frame(question_index = cross_data$question_index,
                             eco_content_cross = cross_content_model_eco$high, 
                             noneco_content_cross = cross_content_model_noneco$high)

cross_output_data_content <- base::cbind(cross_predict_result, fk_grade_level=cross_data$fk_grade_level, 
                                 FRES_score=cross_data$FRES_score,text_source=cross_data$text_source, 
                                 autor=cross_data$autor, year=cross_data$year)

# write.csv(cross_output_data_content,"cross_output_data_content.csv")

#take a look of the result
cross_output_data_content%>%
  head() %>% kbl() %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "200px")
question_index eco_content_cross noneco_content_cross fk_grade_level FRES_score text_source autor year
2 0.85 0.8500000 10.523056 51.67750 speech Guy Debelle[*]Deputy Governor 2018
4 0.29 0.2966667 11.442000 42.71433 speech Guy Debelle[*]Deputy Governor 2018
7 0.67 0.4933333 12.041905 46.13143 speech Guy Debelle[*]Deputy Governor 2018
8 0.74 0.6966667 8.421287 65.67745 speech Guy Debelle[*]Deputy Governor 2018
9 0.36 0.3933333 10.451579 53.97105 speech Guy Debelle[*]Deputy Governor 2018
11 0.38 0.5266667 11.040000 43.22167 speech Guy Debelle[*]Deputy Governor 2018

4.3 Scoring using readability models

Regenerate predictions using readability models. A snapshot of the table is shown as below:

cross_clarity_model_eco <- predict(model_eco_clarity,cross_data, type = "prob") %>% as.data.frame()

cross_data$pos_word2_RBR <- 0
cross_clarity_model_noneco <- predict(model_noneco_clarity,cross_data, type = "prob") %>% as.data.frame()

#extract prediction result
cross_predict_result_clarity <- data.frame(question_index = cross_data$question_index,
                                   eco_clarity_cross = cross_clarity_model_eco$high, 
                                   noneco_clarity_cross = cross_clarity_model_noneco$high)

cross_score_result <- left_join(cross_output_data_content,cross_predict_result_clarity, by = "question_index")

#take a look of the data
cross_score_result %>%
  head() %>% kbl() %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "200px")
question_index eco_content_cross noneco_content_cross fk_grade_level FRES_score text_source autor year eco_clarity_cross noneco_clarity_cross
2 0.85 0.8500000 10.523056 51.67750 speech Guy Debelle[*]Deputy Governor 2018 0.60 0.416
4 0.29 0.2966667 11.442000 42.71433 speech Guy Debelle[*]Deputy Governor 2018 0.49 0.684
7 0.67 0.4933333 12.041905 46.13143 speech Guy Debelle[*]Deputy Governor 2018 0.45 0.572
8 0.74 0.6966667 8.421287 65.67745 speech Guy Debelle[*]Deputy Governor 2018 0.45 0.608
9 0.36 0.3933333 10.451579 53.97105 speech Guy Debelle[*]Deputy Governor 2018 0.40 0.676
11 0.38 0.5266667 11.040000 43.22167 speech Guy Debelle[*]Deputy Governor 2018 0.47 0.400
#save the output
# write.csv(cross_score_result,"cross_predict_result.csv")

5 RBA speeches

Extract speech data to see how the scores changing within a document. The results are discussed in thesection 7.3 of the paper.

speech_data <- cross_data %>% filter(text_source=="speech")
speech_data_short <- speech_data %>% select(question_index,index.x,text_source, year, autor, paragraph)

## find the speech title for each speeches and the order of paragraphs
speech_source <- read.csv("./data_input/speech_full.csv")
speech_model_result <- cross_score_result %>% filter(text_source=="speech")

## Join the speech source data and the speech model results using question_index
speech_scores <- left_join(speech_model_result,speech_source,  by = "question_index")


library(stringr)
speech_scores$paragraph <- str_squish(speech_scores$paragraph)
speech_scores$cross_data.paragraph <- str_squish(speech_scores$cross_data.paragraph)

para_check <- speech_scores %>% select(question_index, paragraph, cross_data.paragraph) %>% 
  mutate(check = ifelse(as.character(paragraph)==as.character(cross_data.paragraph), "yes","no"))

# ## check
# para_check %>% filter(check == "no") # the output is NA, which means that the source are correctly found for each paragraph

## clean the data a little bit
speech_scores_clean <- speech_scores %>% select(-cross_data.question_index, -cross_data.paragraph,
                                                -para_first50_letters, -X.1,
                                                -cross_data.fk_grade_level, -cross_data.fk_grade_level, -year.y,
                                                -cross_data.text_source, -X) %>% filter(!is.na(paragraph))
speech_scores_clean %>%
  head() %>% kbl() %>%
  kable_paper() %>%
  scroll_box(width = "100%", height = "200px")
question_index eco_content_cross noneco_content_cross fk_grade_level FRES_score text_source autor year.x eco_clarity_cross noneco_clarity_cross index cross_data.FRES_score cross_data.autor web_link para_order author event paragraph
2 0.85 0.8500000 10.523056 51.67750 speech Guy Debelle[*]Deputy Governor 2018 0.60 0.416 363 51.67750 Guy Debelle[*]Deputy Governor G:/Research/JoanH/20181022_SMP Project/data-paragraphs/speeches/-dg-2018-05-15-1.csv 2 Guy Debelle[*]Deputy Governor Opening Keynote at the CFO Forum Sydney – 15 May 2018 When reading through the Bank’s forecasts, I think it is useful to avoid false precision. An important question to ask is: are these revisions to the Bank’s outlook consequential for the monetary policy decision? Similarly you can ask, do I think these changes affect my own decisions about my household or my business? A tenth or two of a percentage point here or there on the outlook for GDP or inflation is unlikely to matter that much for any of those decisions. Often, these revisions reflect the new information that has come to hand over the previous quarter. This leads us to reassess the starting point for our forecasts of where the economy is then likely to go. In making this assessment, we ask whether the incoming data have been view-changing or view-validating. Over the previous three months , the data have generally been view-validating.
4 0.29 0.2966667 11.442000 42.71433 speech Guy Debelle[*]Deputy Governor 2018 0.49 0.684 364 42.71433 Guy Debelle[*]Deputy Governor G:/Research/JoanH/20181022_SMP Project/data-paragraphs/speeches/-dg-2018-05-15-1.csv 4 Guy Debelle[*]Deputy Governor Opening Keynote at the CFO Forum Sydney – 15 May 2018 As I just said, the forecasts we have just published are little changed from those we published in February. Table 1 shows the Bank’s outlook for GDP growth, the unemployment rate and inflation until June 2020. This is generally the time horizon that is relevant for the Board’s deliberations on monetary policy.
7 0.67 0.4933333 12.041905 46.13143 speech Guy Debelle[*]Deputy Governor 2018 0.45 0.572 365 46.13143 Guy Debelle[*]Deputy Governor G:/Research/JoanH/20181022_SMP Project/data-paragraphs/speeches/-dg-2018-05-15-1.csv 7 Guy Debelle[*]Deputy Governor Opening Keynote at the CFO Forum Sydney – 15 May 2018 Other developments in financial conditions can affect the forecasts too. One example is developments in money market rates. In the SMP, we document the recent rise in money market interest rates in the US, particularly LIBOR . There are a number of explanations for the rise, including a large increase in bill issuance by the US Treasury and the effect of various tax changes on investment decisions by CFOs at some US companies with large cash pools. This increases the wholesale funding costs for the Australian banks, as well as increasing the costs for borrowers whose lending rates are priced off BBSW, which includes many corporates.
8 0.74 0.6966667 8.421287 65.67745 speech Guy Debelle[*]Deputy Governor 2018 0.45 0.608 366 65.67745 Guy Debelle[*]Deputy Governor G:/Research/JoanH/20181022_SMP Project/data-paragraphs/speeches/-dg-2018-05-15-1.csv 8 Guy Debelle[*]Deputy Governor Opening Keynote at the CFO Forum Sydney – 15 May 2018 However, the effect to date has not been that large in terms of the overall impact on bank funding costs. Thus far, it has not been a consequential development from a forecasting point of view. It is not clear how much of the rise in LIBOR is due to structural changes in money markets and how much is temporary. In the last couple of weeks, these money market rates have declined noticeably from their peaks. We will continue to monitor how this unfolds in the period ahead.
9 0.36 0.3933333 10.451579 53.97105 speech Guy Debelle[*]Deputy Governor 2018 0.40 0.676 367 53.97105 Guy Debelle[*]Deputy Governor G:/Research/JoanH/20181022_SMP Project/data-paragraphs/speeches/-dg-2018-05-15-1.csv 9 Guy Debelle[*]Deputy Governor Opening Keynote at the CFO Forum Sydney – 15 May 2018 The outlook for the economy in 2018 and 2019 is expected to be a little stronger than occurred in 2017. GDP growth is expected to pick up from around 2½ per cent currently to be around 3¼ over the next couple of years.
11 0.38 0.5266667 11.040000 43.22167 speech Guy Debelle[*]Deputy Governor 2018 0.47 0.400 368 43.22167 Guy Debelle[*]Deputy Governor G:/Research/JoanH/20181022_SMP Project/data-paragraphs/speeches/-dg-2018-05-15-1.csv 11 Guy Debelle[*]Deputy Governor Opening Keynote at the CFO Forum Sydney – 15 May 2018 One part of the answer is that the global conjuncture is constructive. We have been witnessing the most synchronised pick-up in the global economy for quite some time. The US economy is doing well. Europe is doing better than it has been for the past decade. The Japanese economy has recorded its longest period of quarterly growth in almost three decades. The global conjuncture has been reflected in a pick-up in global industrial production and trade that is particularly beneficial to the east Asian region, which is leveraged to the global cycle.
## export results

# write.csv(speech_scores_clean, "speech_scores_clean.csv")

6 Session information

The session information for this program is shown as below.

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_Australia.1252  LC_CTYPE=English_Australia.1252   
## [3] LC_MONETARY=English_Australia.1252 LC_NUMERIC=C                      
## [5] LC_TIME=English_Australia.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.3.4    stringr_1.4.0       plyr_1.8.6         
##  [4] questionr_0.7.4     klaR_0.6-15         MASS_7.3-53        
##  [7] PRROC_1.3.1         randomForest_4.6-14 dplyr_1.0.6        
## [10] tidyr_1.1.3         caret_6.0-86        ggplot2_3.3.3      
## [13] lattice_0.20-41    
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.2           viridisLite_0.4.0    splines_4.0.3       
##  [4] foreach_1.5.1        prodlim_2019.11.13   shiny_1.6.0         
##  [7] assertthat_0.2.1     highr_0.9            stats4_4.0.3        
## [10] yaml_2.2.1           ipred_0.9-11         pillar_1.6.0        
## [13] glue_1.4.2           pROC_1.17.0.1        digest_0.6.27       
## [16] promises_1.2.0.1     rvest_1.0.0          colorspace_2.0-1    
## [19] recipes_0.1.16       htmltools_0.5.1.1    httpuv_1.6.1        
## [22] Matrix_1.2-18        timeDate_3043.102    pkgconfig_2.0.3     
## [25] labelled_2.8.0       haven_2.4.1          purrr_0.3.4         
## [28] xtable_1.8-4         webshot_0.5.2        scales_1.1.1        
## [31] svglite_2.0.0        later_1.2.0          gower_0.2.2         
## [34] lava_1.6.9           tibble_3.1.1         combinat_0.0-8      
## [37] generics_0.1.0       ellipsis_0.3.2       withr_2.4.2         
## [40] nnet_7.3-14          survival_3.2-7       magrittr_2.0.1      
## [43] crayon_1.4.1         mime_0.10            evaluate_0.14       
## [46] fansi_0.4.2          nlme_3.1-149         xml2_1.3.2          
## [49] forcats_0.5.1        class_7.3-17         tools_4.0.3         
## [52] data.table_1.14.0    hms_1.0.0            lifecycle_1.0.0     
## [55] munsell_0.5.0        compiler_4.0.3       systemfonts_1.0.1   
## [58] rlang_0.4.11         grid_4.0.3           iterators_1.0.13    
## [61] rstudioapi_0.13      miniUI_0.1.1.1       rmarkdown_2.8       
## [64] gtable_0.3.0         ModelMetrics_1.2.2.2 codetools_0.2-16    
## [67] DBI_1.1.1            reshape2_1.4.4       R6_2.5.0            
## [70] lubridate_1.7.10     knitr_1.33           fastmap_1.1.0       
## [73] utf8_1.2.1           stringi_1.5.3        Rcpp_1.0.6          
## [76] vctrs_0.3.8          rpart_4.1-15         tidyselect_1.1.1    
## [79] xfun_0.22