In this code, we score two out-of-sample datasets using four models. The first dataset is a time-series text data will all paragraphs extracted from the RBA SMP Introduction from 1997 to 2020, and the second one is a cross-sectional text data with paragraphs collected from various sources. The output files are saved in the data_output folder, and the results are discussed in the Section 7 of the paper.
Upload all libraries in this step. You may need to install some packages if this is your first time using them.
<-c("caret","tidyr","dplyr","caret", "randomForest","ggplot2","PRROC", "klaR","questionr", "plyr", "stringr", "kableExtra")
load.liblapply(load.lib,require,character.only = TRUE)
<- dplyr::select select
Upload models that were build using in the code ‘P3_Building_Models.Rmd’. So, you can run this step without running previous steps.
#content models
<-readRDS("./data_input/model/para_eco_content_final_model.rda")
model_eco_content <- readRDS("./data_input/model/para_noneco_content_final_model.rda")
model_noneco_content #clarity models
<- readRDS("./data_input/model/para_eco_clarity_final_model.rda")
model_eco_clarity <- readRDS("./data_input/model/para_noneco_clarity_final_model.rda") model_noneco_clarity
In this section, we apply our models to score paragraphs extracted from SMP introduction sections from 1997 to 2020. The output spreadsheet titled ‘smp_prediction_results.csv’ is saved in the r_output folder, and results are reported and discussed in the Section 7.1 of the paper.
Import sample paragraphs with text features. A snapshot of the data is shown as:
## Import data from 1997 to 2019 Feb
<- readRDS("./data_input/smp_text_feature_final0717.rds")
smp_text_feature_part1 ## Import data 2019 Feb to 2019 Nov
<- readRDS("./data_input/smp_intro2019_text_feature.rds")
smp_text_feature_2019 ## Import data 2020 to 2021 Feb
<- readRDS("./data_input/smp_2021_text_feature.rds")
smp_text_feature_2020
#update names to make column names consistent across three datasets
<- plyr::rename(smp_text_feature_part1, c("start_word_pos" ="word_pos.word1",
smp_text_feature_part1 "second_word_pos" = "word_pos.word2",
"third_word_pos" = "word_pos.word3"))
##combine the three datasets together
<- plyr::rbind.fill(smp_text_feature_part1, smp_text_feature_2019, smp_text_feature_2020)
smp_text_feature_data
### transpose the following 3 variables:word_pos.word1, word_pos.word2, word_pos.word3
<- smp_text_feature_data
mydata <- mydata %>% ungroup() %>% select(question_index, word_pos.word1) %>%
start_word_pos group_by(question_index, word_pos.word1) %>%
::count() %>% tidyr::spread(word_pos.word1, n)
dplyr###update the column name
colnames(start_word_pos)[2:ncol(start_word_pos)] <- paste("word_1st",colnames(start_word_pos)[2:ncol(start_word_pos)], sep = "_")
## for the 2nd word pos
<- mydata %>% ungroup() %>% select(question_index, word_pos.word2) %>%
second_word_pos group_by(question_index, word_pos.word2) %>%
::count() %>% tidyr::spread(word_pos.word2, n)
dplyr###update the column name
colnames(second_word_pos)[2:ncol(second_word_pos)] <- paste("word_2nd",colnames(second_word_pos)[2:ncol(second_word_pos)], sep = "_")
## for the 3rd word pos
<- mydata %>% ungroup() %>% select(question_index, word_pos.word3) %>%
third_word_pos group_by(question_index, word_pos.word3) %>%
::count() %>% spread(word_pos.word3, n)
dplyr
colnames(third_word_pos)[2:ncol(third_word_pos)] <- paste("word_3rd",colnames(third_word_pos)[2:ncol(third_word_pos)], sep = "_")
##join the transposed data back to the main table and remove the original columns before transposing
<- mydata %>% select(-word_pos.word1, -word_pos.word2, -word_pos.word3, -word_choose)
mydata2
<- left_join(mydata ,start_word_pos,by="question_index") %>%
smp_text_feature_final left_join(second_word_pos,by="question_index") %>%
left_join(third_word_pos,by="question_index") %>% as.data.frame()
#further data cleaning: replace NAs with 0, remove non-English symbols from the name
names(smp_text_feature_final) <- gsub(x = names(smp_text_feature_final),
pattern ='[$]', replacement = 'ds')
$word_per_sentence <- smp_text_feature_final$word_count_stats/smp_text_feature_final$sentence_count
smp_text_feature_final$sylls_per_word <- smp_text_feature_final$readability_stats.sylls/smp_text_feature_final$word_count_stats
smp_text_feature_final
is.na(smp_text_feature_final)] <-0 # replace NA with 0s
smp_text_feature_final[
## change some column names to make the datasets names the same
names(smp_text_feature_final) <- gsub(x = names(smp_text_feature_final), pattern = "word_1st_", replacement = "pos_word1_")
names(smp_text_feature_final) <- gsub(x = names(smp_text_feature_final), pattern = "word_2nd_", replacement = "pos_word2_")
names(smp_text_feature_final) <- gsub(x = names(smp_text_feature_final), pattern = "word_3rd_", replacement = "pos_word3_")
#take a look of the data
%>% head() %>% kbl() %>%
smp_text_feature_final kable_paper() %>%
scroll_box(width = "100%", height = "200px")
index.x | para_index | year | month | month_no | section | paragraph | paragraph_clean | word_count_stats | sentence_count | readability_stats.sylls | readability_stats.polys | fk_grade_level | FRES_score | comma_count | punc_count | digit_count | question_index | index.y | CC | CD | DT | EX | IN | JJ | JJR | NN | NNS | RB | RBR | TO | VB | VBD | VBG | VBN | VBP | VBZ | WDT | MD | POS | PRP | RP | PRPds | -LRB- | -RRB- | NNP | RBS | WRB | JJS | WPds | WP | PDT | UH | NNPS | pos_prop_CC | pos_prop_CD | pos_prop_DT | pos_prop_EX | pos_prop_IN | pos_prop_JJ | pos_prop_JJR | pos_prop_NN | pos_prop_NNS | pos_prop_RB | pos_prop_RBR | pos_prop_TO | pos_prop_VB | pos_prop_VBD | pos_prop_VBG | pos_prop_VBN | pos_prop_VBP | pos_prop_VBZ | pos_prop_WDT | pos_prop_MD | pos_prop_POS | pos_prop_PRP | pos_prop_RP | pos_prop_PRPds | pos_prop_NNP | pos_prop_RBS | pos_prop_WRB | pos_prop_JJS | pos_prop_WPds | pos_prop_WP | pos_prop_PDT | pos_prop_UH | pos_prop_NNPS | para_rank | index | sent_1st_DT | sent_1st_IN | sent_1st_JJ | sent_1st_NN | sent_1st_NNS | sent_1st_TO | sent_1st_VB | sent_1st_VBG | sent_1st_VBP | sent_1st_PRP | sent_1st_RB | sent_1st_VBD | sent_1st_VBN | sent_1st_JJS | sent_1st_VBZ | sent_1st_CC | sent_1st_RBR | sent_1st_CD | sent_1st_PRPds | sent_1st_POS | sent_1st_WDT | sent_1st_WRB | sent_1st_MD | sent_1st_RBS | sent_1st_JJR | sent_1st_EX | sent_1st_RP | sent_1st_WP | sent_1st_NNP | sent_1st_-LRB- | sent_1st_-RRB- | sent_1st_PDT | sent_1st_prop_DT | sent_1st_prop_IN | sent_1st_prop_JJ | sent_1st_prop_NN | sent_1st_prop_NNS | sent_1st_prop_TO | sent_1st_prop_VB | sent_1st_prop_VBG | sent_1st_prop_VBP | sent_1st_prop_PRP | sent_1st_prop_RB | sent_1st_prop_VBD | sent_1st_prop_VBN | sent_1st_prop_JJS | sent_1st_prop_VBZ | sent_1st_prop_CC | sent_1st_prop_RBR | sent_1st_prop_CD | sent_1st_prop_PRPds | sent_1st_prop_POS | sent_1st_prop_WDT | sent_1st_prop_WRB | sent_1st_prop_MD | sent_1st_prop_RBS | sent_1st_prop_JJR | sent_1st_prop_EX | sent_1st_prop_RP | sent_1st_prop_WP | sent_1st_prop_NNP | sent_1st_prop_-LRB- | sent_1st_prop_-RRB- | sent_1st_prop_PDT | word_pos.word1 | word_pos.word2 | word_pos.word3 | sent1st_clue_Attitudinal | sent1st_clue_connective | sent1st_clue_Contrast | sent1st_clue_detail | sent1st_clue_emphasis | sent1st_clue_inference | sent1st_clue_reformulation | sent1st_clue_summary | sentlast_clue_Attitudinal | sentlast_clue_connective | sentlast_clue_Contrast | sentlast_clue_detail | sentlast_clue_emphasis | sentlast_clue_inference | sentlast_clue_reformulation | sentlast_clue_summary | sentmiddle_clue_Attitudinal | sentmiddle_clue_connective | sentmiddle_clue_Contrast | sentmiddle_clue_detail | sentmiddle_clue_emphasis | sentmiddle_clue_inference | sentmiddle_clue_reformulation | sentmiddle_clue_summary | sent_1st_word_, | sent_1st_word_CC | sent_1st_word_CD | sent_1st_word_DT | sent_1st_word_EX | sent_1st_word_FW | sent_1st_word_IN | sent_1st_word_JJ | sent_1st_word_JJR | sent_1st_word_JJS | sent_1st_word_MD | sent_1st_word_NN | sent_1st_word_NNS | sent_1st_word_PDT | sent_1st_word_PRP | sent_1st_word_PRPds | sent_1st_word_RB | sent_1st_word_RBR | sent_1st_word_RBS | sent_1st_word_RP | sent_1st_word_TO | sent_1st_word_VB | sent_1st_word_VBD | sent_1st_word_VBG | sent_1st_word_VBN | sent_1st_word_VBP | sent_1st_word_VBZ | sent_1st_word_WDT | sent_1st_word_WP | sent_1st_word_WRB | sent_1st_parse_ADJP | sent_1st_parse_NP | sent_1st_parse_PP | sent_1st_parse_S | sent_1st_parse_SBAR | sent_1st_parse_VP | sent_1st_parse_WHNP | sent_1st_parse_ADVP | sent_1st_parse_WHADVP | sent_1st_parse_WHPP | sent_1st_parse_SINV | sent_1st_parse_SQ | sent_1st_parse_SBARQ | sent_last_parse_ADJP | sent_last_parse_NP | sent_last_parse_PP | sent_last_parse_S | sent_last_parse_SBAR | sent_last_parse_VP | sent_last_parse_WHNP | sent_last_parse_ADVP | sent_last_parse_WHADVP | sent_last_parse_WHPP | sent_last_parse_SINV | sent_last_parse_SQ | sent_last_parse_SBARQ | ADJP | NP | PP | S | SBAR | VP | WHNP | ADVP | WHADVP | WHPP | SINV | SQ | SBARQ | word_choose | word_pos | X | rank | sent_1st_word_NNS_POS | pos_word1_CC | pos_word1_CD | pos_word1_DT | pos_word1_EX | pos_word1_IN | pos_word1_JJ | pos_word1_JJR | pos_word1_JJS | pos_word1_NN | pos_word1_NNS | pos_word1_PRP | pos_word1_RB | pos_word1_RBR | pos_word1_RBS | pos_word1_RP | pos_word1_VB | pos_word1_VBD | pos_word1_VBG | pos_word1_VBN | pos_word1_VBP | pos_word1_VBZ | pos_word2_CC | pos_word2_CD | pos_word2_DT | pos_word2_IN | pos_word2_JJ | pos_word2_JJR | pos_word2_JJS | pos_word2_MD | pos_word2_NN | pos_word2_NN_POS | pos_word2_NNS | pos_word2_PDT | pos_word2_PRP | pos_word2_PRPds | pos_word2_RB | pos_word2_RBR | pos_word2_RBS | pos_word2_RP | pos_word2_SYM | pos_word2_TO | pos_word2_VB | pos_word2_VBD | pos_word2_VBG | pos_word2_VBN | pos_word2_VBP | pos_word2_VBZ | pos_word3_CC | pos_word3_CD | pos_word3_DT | pos_word3_EX | pos_word3_IN | pos_word3_JJ | pos_word3_JJR | pos_word3_JJS | pos_word3_MD | pos_word3_NN | pos_word3_NNP | pos_word3_NNS | pos_word3_PRP | pos_word3_RB | pos_word3_RBS | pos_word3_RP | pos_word3_TO | pos_word3_VB | pos_word3_VBD | pos_word3_VBG | pos_word3_VBN | pos_word3_VBP | pos_word3_VBZ | pos_word3_WDT | pos_word3_WRB | word_per_sentence | sylls_per_word |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 1997_5 | 1997 | may | 5 | Introduction | The economy moved through a period of slower growth in 1996 during which inflationary pressures eased significantly. The March quarter CPI result confirms that underlying inflation has returned to an annual rate of close to 2 per cent, and the prospects of inflation remaining low in the near future appear to be good. With some surplus capacity existing there is scope for the economy to grow more quickly in 1997 without generating significant inflationary pressures, provided growth in labour costs is not excessive. | The economy moved through a period of slower growth in 1996 during which inflationary pressures eased significantly. The March quarter CPI result confirms that underlying inflation has returned to an annual rate of close to 2 per cent, and the prospects of inflation remaining low in the near future appear to be good. With some surplus capacity existing there is scope for the economy to grow more quickly in 1997 without generating significant inflationary pressures, provided growth in labour costs is not excessive. | 80 | 3 | 147 | 19 | 16.49250 | 24.31583 | 2 | 3 | 9 | 1997_5_1 | 1 | 1 | 3 | 8 | 1 | 14 | 9 | 1 | 18 | 4 | 3 | 1 | 4 | 2 | 2 | 4 | 2 | 1 | 4 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.20 | 3.61 | 9.64 | 1.20 | 16.87 | 10.84 | 1.20 | 21.69 | 4.82 | 3.61 | 1.20 | 4.82 | 2.41 | 2.41 | 4.82 | 2.41 | 1.20 | 4.82 | 1.20 | 0.00 | 0.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 2 | 4 | 1 | 3 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 11.76 | 23.53 | 5.88 | 17.65 | 5.88 | 0.00 | 0.00 | 0.00 | 0.00 | 0 | 5.88 | 11.76 | 0.00 | 0 | 0.00 | 0.00 | 0.00 | 5.88 | 0 | 0 | 5.88 | 0 | 0.00 | 0 | 5.88 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | DT | NN | VBD | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 3 | 2 | 1 | 2 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 12 | 6 | 4 | 3 | 6 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 3 | 30 | 14 | 11 | 5 | 15 | 1 | 2 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 26.66667 | 1.837500 |
2 | 1997_5 | 1997 | may | 5 | Introduction | So far in 1997 there have already been some indications that the pace of growth is picking up, particularly in areas of construction investment, housing and consumer spending. Growth is being supported by several factors including the effect of the three policy easings in the second half of 1996, which brought cash rates down by 1½ percentage points. The impact of those cash rate reductions on home mortgage borrowers has been reinforced by a significant compression of intermediaries’ interest margins in that area. The lower interest rates now in place should be supportive of interest-sensitive areas of activity, particularly housing and non-residential construction, as well as helping household and business cash flows. Other factors favourable to growth at present include the strong US economy, moderately rising commodity prices, and an historically good level of business profitability in many industries. | So far in 1997 there have already been some indications that the pace of growth is picking up, particularly in areas of construction investment, housing and consumer spending. Growth is being supported by several factors including the effect of the three policy easings in the second half of 1996, which brought cash rates down by 1½ percentage points. The impact of those cash rate reductions on home mortgage borrowers has been reinforced by a significant compression of intermediaries’ interest margins in that area. The lower interest rates now in place should be supportive of interest-sensitive areas of activity, particularly housing and non-residential construction, as well as helping household and business cash flows. Other factors favourable to growth at present include the strong US economy, moderately rising commodity prices, and an historically good level of business profitability in many industries. | 137 | 5 | 257 | 34 | 17.23177 | 20.32181 | 7 | 8 | 9 | 1997_5_2 | 2 | 4 | 4 | 12 | 1 | 23 | 12 | 1 | 35 | 16 | 10 | 0 | 1 | 1 | 1 | 5 | 4 | 2 | 3 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.86 | 2.86 | 8.57 | 0.71 | 16.43 | 8.57 | 0.71 | 25.00 | 11.43 | 7.14 | 0.00 | 0.71 | 0.71 | 0.71 | 3.57 | 2.86 | 1.43 | 2.14 | 0.71 | 0.71 | 0.71 | 0.71 | 0.71 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 2 | 5 | 0 | 7 | 2 | 0 | 0 | 1 | 1 | 0 | 4 | 0 | 1 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 7.14 | 17.86 | 0.00 | 25.00 | 7.14 | 0.00 | 0.00 | 3.57 | 3.57 | 0 | 14.29 | 0.00 | 3.57 | 0 | 3.57 | 3.57 | 0.00 | 3.57 | 0 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 3.57 | 3.57 | 0 | 0 | 0 | 0 | 0 | RB | RB | IN | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13 | 4 | 2 | 1 | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 2 | 13 | 4 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 61 | 23 | 8 | 2 | 15 | 1 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 27.40000 | 1.875912 |
3 | 1997_5 | 1997 | may | 5 | Introduction | Conditions in the manufacturing sector have been an exception to this generally firmer picture, with profitability under pressure and investment intentions declining. The pressure on profitability reflects a combination of rising wage costs and flat selling prices, which continue to be constrained in many parts of the manufacturing sector by strong international competition. With the exchange rate having risen in trade-weighted terms, pressures on competitiveness have intensified during the past year. Looking ahead, however, a number of areas of domestic manufacturing are likely to benefit from the expansion in housing and non-residential construction now under way. | Conditions in the manufacturing sector have been an exception to this generally firmer picture, with profitability under pressure and investment intentions declining. The pressure on profitability reflects a combination of rising wage costs and flat selling prices, which continue to be constrained in many parts of the manufacturing sector by strong international competition. With the exchange rate having risen in trade-weighted terms, pressures on competitiveness have intensified during the past year. Looking ahead, however, a number of areas of domestic manufacturing are likely to benefit from the expansion in housing and non-residential construction now under way. | 96 | 4 | 194 | 24 | 17.61583 | 11.51250 | 5 | 6 | 0 | 1997_5_3 | 3 | 3 | 0 | 10 | 0 | 17 | 9 | 1 | 25 | 8 | 4 | 0 | 3 | 2 | 0 | 4 | 4 | 4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3.12 | 0.00 | 10.42 | 0.00 | 17.71 | 9.38 | 1.04 | 26.04 | 8.33 | 4.17 | 0.00 | 3.12 | 2.08 | 0.00 | 4.17 | 4.17 | 4.17 | 1.04 | 1.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | 3 | 3 | 0 | 7 | 2 | 1 | 0 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 13.64 | 13.64 | 0.00 | 31.82 | 9.09 | 4.55 | 0.00 | 4.55 | 4.55 | 0 | 4.55 | 0.00 | 4.55 | 0 | 0.00 | 4.55 | 0.00 | 0.00 | 0 | 0 | 0.00 | 0 | 0.00 | 0 | 4.55 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | NNS | IN | DT | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 4 | 1 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 12 | 5 | 3 | 0 | 4 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 1 | 40 | 18 | 9 | 1 | 16 | 1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 24.00000 | 2.020833 |
4 | 1997_5 | 1997 | may | 5 | Introduction | Employment growth has lagged behind the overall pace of economic activity. Total employment has been growing at a rate of around 1 per cent in the past year, concentrated in part-time jobs. Nonetheless, the number of job vacancies has increased and employment can be expected to strengthen as the general pace of activity picks up over the course of the year. | Employment growth has lagged behind the overall pace of economic activity. Total employment has been growing at a rate of around 1 per cent in the past year, concentrated in part-time jobs. Nonetheless, the number of job vacancies has increased and employment can be expected to strengthen as the general pace of activity picks up over the course of the year. | 60 | 3 | 99 | 12 | 11.68000 | 46.94500 | 2 | 4 | 1 | 1997_5_4 | 4 | 1 | 1 | 7 | 0 | 13 | 6 | 0 | 15 | 2 | 1 | 0 | 1 | 2 | 1 | 1 | 4 | 0 | 4 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.64 | 1.64 | 11.48 | 0.00 | 21.31 | 9.84 | 0.00 | 24.59 | 3.28 | 1.64 | 0.00 | 1.64 | 3.28 | 1.64 | 1.64 | 6.56 | 0.00 | 6.56 | 0.00 | 1.64 | 0.00 | 0.00 | 1.64 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 4 | 1 | 1 | 2 | 2 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9.09 | 18.18 | 18.18 | 36.36 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 9.09 | 0 | 9.09 | 0.00 | 0.00 | 0.00 | 0 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | NN | NN | VBZ | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 4 | 2 | 1 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | 5 | 5 | 0 | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 21 | 12 | 7 | 0 | 15 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 20.00000 | 1.650000 |
5 | 1997_5 | 1997 | may | 5 | Introduction | The favourable near-term outlook for inflation is being underpinned by continued help from the exchange rate in holding down import prices. Also helpful to the outlook is the result of the recent Safety-Net Review by the Australian Industrial Relations Commission, which delivered only moderate increases in award wages. However, other developments in labour costs are of more concern for the longer-term inflation outlook. Wage increases under enterprise bargaining continue to be in the 4 to 5 per cent range, figures which appear high in a climate of 2 per cent inflation and 8½ per cent unemployment. Aggregate wages data, which encompass workers on award wages, enterprise agreements and other bargaining arrangements, suggest that the overall pace of wages growth has picked up recently. These figures will need to be closely watched to assess the extent to which they represent a significant change in trend but, taken at face value, they suggest that wages growth is becoming uncomfortably high. | The favourable near-term outlook for inflation is being underpinned by continued help from the exchange rate in holding down import prices. Also helpful to the outlook is the result of the recent Safety-Net Review by the Australian Industrial Relations Commission, which delivered only moderate increases in award wages. However, other developments in labour costs are of more concern for the longer-term inflation outlook. Wage increases under enterprise bargaining continue to be in the 4 to 5 per cent range, figures which appear high in a climate of 2 per cent inflation and 8½ per cent unemployment. Aggregate wages data, which encompass workers on award wages, enterprise agreements and other bargaining arrangements, suggest that the overall pace of wages growth has picked up recently. These figures will need to be closely watched to assess the extent to which they represent a significant change in trend but, taken at face value, they suggest that wages growth is becoming uncomfortably high. | 155 | 6 | 276 | 33 | 15.49661 | 29.97159 | 8 | 9 | 4 | 1997_5_5 | 5 | 3 | 4 | 13 | 0 | 23 | 17 | 1 | 36 | 16 | 6 | 0 | 6 | 4 | 1 | 3 | 4 | 7 | 5 | 4 | 1 | 0 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.90 | 2.53 | 8.23 | 0.00 | 14.56 | 10.76 | 0.63 | 22.78 | 10.13 | 3.80 | 0.00 | 3.80 | 2.53 | 0.63 | 1.90 | 2.53 | 4.43 | 3.16 | 2.53 | 0.63 | 0.00 | 1.27 | 1.27 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5 | 1 | 2 | 4 | 3 | 6 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 9.52 | 19.05 | 14.29 | 28.57 | 4.76 | 0.00 | 0.00 | 9.52 | 0.00 | 0 | 0.00 | 0.00 | 4.76 | 0 | 4.76 | 0.00 | 0.00 | 0.00 | 0 | 0 | 0.00 | 0 | 0.00 | 0 | 0.00 | 0.00 | 4.76 | 0 | 0 | 0 | 0 | 0 | DT | JJ | IN | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 4 | 2 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 10 | 2 | 6 | 2 | 14 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 3 | 57 | 20 | 18 | 6 | 29 | 4 | 4 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 25.83333 | 1.780645 |
6 | 1997_5 | 1997 | may | 5 | Introduction | The capacity of the economy to grow faster while maintaining low inflation will depend importantly on the future behaviour of wages. A significant acceleration of aggregate wages in response to stronger economic growth would directly curtail job creation. It would also threaten faster inflation – to which monetary policy would have to respond – and thereby put at risk the potential for faster non-inflationary growth. Lessening of wages growth would, on the other hand, enable faster growth without risk of acceleration in inflation. | The capacity of the economy to grow faster while maintaining low inflation will depend importantly on the future behaviour of wages. A significant acceleration of aggregate wages in response to stronger economic growth would directly curtail job creation. It would also threaten faster inflation – to which monetary policy would have to respond – and thereby put at risk the potential for faster non-inflationary growth. Lessening of wages growth would, on the other hand, enable faster growth without risk of acceleration in inflation. | 83 | 4 | 156 | 21 | 14.68081 | 26.76652 | 2 | 7 | 0 | 1997_5_6 | 6 | 1 | 0 | 6 | 0 | 13 | 10 | 4 | 21 | 3 | 5 | 1 | 4 | 7 | 0 | 1 | 0 | 1 | 0 | 1 | 5 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.19 | 0.00 | 7.14 | 0.00 | 15.48 | 11.90 | 4.76 | 25.00 | 3.57 | 5.95 | 1.19 | 4.76 | 8.33 | 0.00 | 1.19 | 0.00 | 1.19 | 0.00 | 1.19 | 5.95 | 0.00 | 1.19 | 0.00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 1 | 3 | 4 | 2 | 4 | 1 | 1 | 2 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 14.29 | 19.05 | 9.52 | 19.05 | 4.76 | 4.76 | 9.52 | 4.76 | 0.00 | 0 | 4.76 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 4.76 | 0.00 | 0 | 0 | 0.00 | 0 | 4.76 | 0 | 0.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | DT | NN | IN | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8 | 3 | 3 | 2 | 5 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 11 | 5 | 1 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 36 | 13 | 8 | 3 | 17 | 1 | 5 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 20.75000 | 1.879518 |
Score the time-series text data from RBA SMP from 1997 to 2020 using two reasoning models: economist-reasoning model and non economist reasoning model. The prediction results are saved in two columns: eco_content and noneco_content. A snapshot of the output table is:
<- smp_text_feature_final
mydata #prediction
<- predict(model_eco_content,mydata, type = "prob") %>% as.data.frame()
probsTest_model_eco <- predict(model_noneco_content,mydata, type = "prob") %>% as.data.frame()
probsTest_model_noneco
#extract the prediction results and construct the final charts
$question_index <- mydata$question_index
probsTest_model_eco
<- data.frame(question_index = mydata$question_index,
predict_result eco_content = probsTest_model_eco$high,
noneco_content = probsTest_model_noneco$high)
<- base::cbind(smp_text_feature_final$question_index,
output_data_content
predict_result,paragraph= smp_text_feature_final$paragraph,
fk_grade_level = smp_text_feature_final$fk_grade_level,
year = smp_text_feature_final$year,
month = smp_text_feature_final$month,
word_count_stats = smp_text_feature_final$word_count_stats,
sentence_count = smp_text_feature_final$sentence_count,
FRES_score = smp_text_feature_final$FRES_score)
# write.csv(output_data_content, "smp_predict_result_content.csv") ##save results
%>% head() %>% kbl() %>%
output_data_content kable_paper() %>%
scroll_box(width = "100%", height = "200px")
smp_text_feature_final$question_index | question_index | eco_content | noneco_content | paragraph | fk_grade_level | year | month | word_count_stats | sentence_count | FRES_score |
---|---|---|---|---|---|---|---|---|---|---|
1997_5_1 | 1997_5_1 | 0.34 | 0.4766667 | The economy moved through a period of slower growth in 1996 during which inflationary pressures eased significantly. The March quarter CPI result confirms that underlying inflation has returned to an annual rate of close to 2 per cent, and the prospects of inflation remaining low in the near future appear to be good. With some surplus capacity existing there is scope for the economy to grow more quickly in 1997 without generating significant inflationary pressures, provided growth in labour costs is not excessive. | 16.49250 | 1997 | may | 80 | 3 | 24.31583 |
1997_5_2 | 1997_5_2 | 0.29 | 0.3800000 | So far in 1997 there have already been some indications that the pace of growth is picking up, particularly in areas of construction investment, housing and consumer spending. Growth is being supported by several factors including the effect of the three policy easings in the second half of 1996, which brought cash rates down by 1½ percentage points. The impact of those cash rate reductions on home mortgage borrowers has been reinforced by a significant compression of intermediaries’ interest margins in that area. The lower interest rates now in place should be supportive of interest-sensitive areas of activity, particularly housing and non-residential construction, as well as helping household and business cash flows. Other factors favourable to growth at present include the strong US economy, moderately rising commodity prices, and an historically good level of business profitability in many industries. | 17.23177 | 1997 | may | 137 | 5 | 20.32181 |
1997_5_3 | 1997_5_3 | 0.40 | 0.2833333 | Conditions in the manufacturing sector have been an exception to this generally firmer picture, with profitability under pressure and investment intentions declining. The pressure on profitability reflects a combination of rising wage costs and flat selling prices, which continue to be constrained in many parts of the manufacturing sector by strong international competition. With the exchange rate having risen in trade-weighted terms, pressures on competitiveness have intensified during the past year. Looking ahead, however, a number of areas of domestic manufacturing are likely to benefit from the expansion in housing and non-residential construction now under way. | 17.61583 | 1997 | may | 96 | 4 | 11.51250 |
1997_5_4 | 1997_5_4 | 0.47 | 0.3666667 | Employment growth has lagged behind the overall pace of economic activity. Total employment has been growing at a rate of around 1 per cent in the past year, concentrated in part-time jobs. Nonetheless, the number of job vacancies has increased and employment can be expected to strengthen as the general pace of activity picks up over the course of the year. | 11.68000 | 1997 | may | 60 | 3 | 46.94500 |
1997_5_5 | 1997_5_5 | 0.63 | 0.7133333 | The favourable near-term outlook for inflation is being underpinned by continued help from the exchange rate in holding down import prices. Also helpful to the outlook is the result of the recent Safety-Net Review by the Australian Industrial Relations Commission, which delivered only moderate increases in award wages. However, other developments in labour costs are of more concern for the longer-term inflation outlook. Wage increases under enterprise bargaining continue to be in the 4 to 5 per cent range, figures which appear high in a climate of 2 per cent inflation and 8½ per cent unemployment. Aggregate wages data, which encompass workers on award wages, enterprise agreements and other bargaining arrangements, suggest that the overall pace of wages growth has picked up recently. These figures will need to be closely watched to assess the extent to which they represent a significant change in trend but, taken at face value, they suggest that wages growth is becoming uncomfortably high. | 15.49661 | 1997 | may | 155 | 6 | 29.97159 |
1997_5_6 | 1997_5_6 | 0.67 | 0.6933333 | The capacity of the economy to grow faster while maintaining low inflation will depend importantly on the future behaviour of wages. A significant acceleration of aggregate wages in response to stronger economic growth would directly curtail job creation. It would also threaten faster inflation – to which monetary policy would have to respond – and thereby put at risk the potential for faster non-inflationary growth. Lessening of wages growth would, on the other hand, enable faster growth without risk of acceleration in inflation. | 14.68081 | 1997 | may | 83 | 4 | 26.76652 |
To get an idea of whether the average effect of a particular variable is positive or negative, we rerun the models for SMP sample paragraphs after removing top five variables one by one. Based on the difference between the new results and the original ones, we classify the partial effect of a variable as positive or negative. The results are discussed in Section 6.3 of the paper.
Regenerate model predictions using the two economist reasoning (content) models after removing the top 5 variables one by one. After that, we save the results as one table, which is mapped back to the original prediction results (in columns eco_content and noneco_content). A snapshot of the output is:
#top variables
<- c("VB","pos_prop_VB","pos_prop_NN","digit_count","pos_prop_MD")
eco_content_var_list <- c("NN","pos_prop_VB","pos_prop_CC","pos_prop_VBP","pos_prop_VB")
eco_clarity_var_list <- c("NP","pos_prop_JJ","pos_prop_DT","readability_stats.sylls","fk_grade_level")
noneco_clarity_var_list
##ECO-CONTENT
#1
<- mydata
var_test_data $VB<-0
var_test_data<- predict(model_eco_content,var_test_data, type = "prob") %>% as.data.frame()
probsTest_model_eco_VB #2
<- mydata
var_test_data $pos_prop_VB<-0
var_test_data<- predict(model_eco_content,var_test_data, type = "prob") %>% as.data.frame()
probsTest_model_eco_propVB #3
<- mydata
var_test_data $pos_prop_NN<-0
var_test_data<- predict(model_eco_content,var_test_data, type = "prob") %>% as.data.frame()
probsTest_model_eco_propNN #4
<- mydata
var_test_data $pos_prop_MD<-0
var_test_data<- predict(model_eco_content,var_test_data, type = "prob") %>% as.data.frame()
probsTest_model_eco_propMD #5
<- mydata
var_test_data $digit_count<-0
var_test_data<- predict(model_eco_content,var_test_data, type = "prob") %>% as.data.frame()
probsTest_model_eco_digit
# results for economist_content model
<- data.frame(question_index = mydata$question_index,
predict_result_drop_variable drop_VB = probsTest_model_eco_VB$high,
drop_propVB = probsTest_model_eco_propVB$high,
drop_propNN = probsTest_model_eco_propNN$high,
drop_MD = probsTest_model_eco_propMD$high,
drop_digit = probsTest_model_eco_digit$high)
<- cbind(predict_result_drop_variable,predict_result)
result_compare_eco_contnet_var_drop
%>%
result_compare_eco_contnet_var_drop head() %>% kbl() %>%
kable_paper() %>%
scroll_box(width = "100%", height = "200px")
question_index | drop_VB | drop_propVB | drop_propNN | drop_MD | drop_digit | question_index | eco_content | noneco_content |
---|---|---|---|---|---|---|---|---|
1997_5_1 | 0.29 | 0.27 | 0.38 | 0.34 | 0.48 | 1997_5_1 | 0.34 | 0.4766667 |
1997_5_2 | 0.29 | 0.29 | 0.33 | 0.23 | 0.40 | 1997_5_2 | 0.29 | 0.3800000 |
1997_5_3 | 0.34 | 0.33 | 0.52 | 0.40 | 0.40 | 1997_5_3 | 0.40 | 0.2833333 |
1997_5_4 | 0.47 | 0.36 | 0.55 | 0.39 | 0.48 | 1997_5_4 | 0.47 | 0.3666667 |
1997_5_5 | 0.63 | 0.57 | 0.70 | 0.55 | 0.66 | 1997_5_5 | 0.63 | 0.7133333 |
1997_5_6 | 0.66 | 0.58 | 0.77 | 0.60 | 0.67 | 1997_5_6 | 0.67 | 0.6933333 |
Regenerate model predictions using the two non-economist reasoning (content) model after removing the top 5 variables one by one. After that, we save the results as one table, which is mapped back to the original prediction results. A snapshot of the output is:
##NONECO-CONTENT / model_noneco_content
<- c("pos_prob_VB","pos_prop_MD","pos_prop_JJ","pos_prop_IN","pos_prop_NN")
noneco_content_var_list #1
<- mydata
var_test_data $pos_prop_VB<-0
var_test_data<- predict(model_noneco_content,var_test_data, type = "prob") %>% as.data.frame()
probsTest_model_noneco_propVB #2
<- mydata
var_test_data $pos_prop_MD<-0
var_test_data<- predict(model_noneco_content,var_test_data, type = "prob") %>% as.data.frame()
probsTest_model_noneco_propMD #3
<- mydata
var_test_data $pos_prop_JJ<-0
var_test_data<- predict(model_noneco_content,var_test_data, type = "prob") %>% as.data.frame()
probsTest_model_noneco_propJJ #4
<- mydata
var_test_data $pos_prop_IN<-0
var_test_data<- predict(model_noneco_content,var_test_data, type = "prob") %>% as.data.frame()
probsTest_model_noneco_propIN #5
<- mydata
var_test_data $pos_prop_NN<-0
var_test_data<- predict(model_noneco_content,var_test_data, type = "prob") %>% as.data.frame()
probsTest_model_noneco_propNN
# results for noneconomist_content model
<- data.frame(question_index = mydata$question_index,
noneco_predict_result_drop_variable noneco_con_drop_propVB = probsTest_model_noneco_propVB$high,
noneco_con_drop_propMD = probsTest_model_noneco_propMD$high,
noneco_con_drop_propJJ = probsTest_model_noneco_propJJ$high,
noneco_con_drop_propIN = probsTest_model_noneco_propIN$high,
noneco_con_drop_propNN = probsTest_model_noneco_propNN$high)
<- cbind(result_compare_eco_contnet_var_drop,
result_compare_var_drop_content
noneco_predict_result_drop_variable)# write.csv(result_compare_var_drop_content , "result_compare_var_drop_content.csv")
%>%
result_compare_var_drop_content head() %>% kbl() %>%
kable_paper() %>%
scroll_box(width = "100%", height = "200px")
question_index | drop_VB | drop_propVB | drop_propNN | drop_MD | drop_digit | question_index | eco_content | noneco_content | question_index | noneco_con_drop_propVB | noneco_con_drop_propMD | noneco_con_drop_propJJ | noneco_con_drop_propIN | noneco_con_drop_propNN |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1997_5_1 | 0.29 | 0.27 | 0.38 | 0.34 | 0.48 | 1997_5_1 | 0.34 | 0.4766667 | 1997_5_1 | 0.4533333 | 0.4766667 | 0.4466667 | 0.5166667 | 0.5100000 |
1997_5_2 | 0.29 | 0.29 | 0.33 | 0.23 | 0.40 | 1997_5_2 | 0.29 | 0.3800000 | 1997_5_2 | 0.3833333 | 0.2466667 | 0.3200000 | 0.4366667 | 0.4433333 |
1997_5_3 | 0.34 | 0.33 | 0.52 | 0.40 | 0.40 | 1997_5_3 | 0.40 | 0.2833333 | 1997_5_3 | 0.2600000 | 0.2833333 | 0.2600000 | 0.3500000 | 0.4333333 |
1997_5_4 | 0.47 | 0.36 | 0.55 | 0.39 | 0.48 | 1997_5_4 | 0.47 | 0.3666667 | 1997_5_4 | 0.2800000 | 0.3033333 | 0.3266667 | 0.4400000 | 0.3766667 |
1997_5_5 | 0.63 | 0.57 | 0.70 | 0.55 | 0.66 | 1997_5_5 | 0.63 | 0.7133333 | 1997_5_5 | 0.6400000 | 0.5466667 | 0.5800000 | 0.7300000 | 0.7166667 |
1997_5_6 | 0.66 | 0.58 | 0.77 | 0.60 | 0.67 | 1997_5_6 | 0.67 | 0.6933333 | 1997_5_6 | 0.6400000 | 0.6033333 | 0.4666667 | 0.6766667 | 0.7233333 |
Score SMP paragraphs using two readability models: economist-readability model and non economist readability model. The prediction results are saved in the two columns of eco_clarity and noneco_clarity respectively. After that, we join the clarity output table with the content output table as the final output table. A snapshot of this table is:
## eco-clarity model
<- predict(model_eco_clarity,mydata, type = "prob") %>% as.data.frame()
probsTest_model_eco_clarity
#add those variables to the dataset as they are not included in the out-of-sample dataset but are included in the model dataset
$FW <- 0
mydata$pos_prop_FW <- 0
mydata$pos_word1_TO <- 0
mydata$pos_word2_EX <- 0
mydata
## noneco-clarity model
<- predict(model_noneco_clarity,mydata, type = "prob") %>% as.data.frame()
probsTest_model_noneco_clarity
## extract prediction result for the clarity models
<- data.frame(question_index = mydata$question_index,
predict_result_clarity eco_clarity = probsTest_model_eco_clarity$high,
noneco_clarity = probsTest_model_noneco_clarity$high)
#final output with content and clarity prediction results
<- left_join(predict_result_clarity, output_data_content, by="question_index")
prediction_smp
## taka a look of the results
%>%
prediction_smp head() %>% kbl() %>%
kable_paper() %>%
scroll_box(width = "100%", height = "200px")
question_index | eco_clarity | noneco_clarity | smp_text_feature_final$question_index | eco_content | noneco_content | paragraph | fk_grade_level | year | month | word_count_stats | sentence_count | FRES_score |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1997_5_1 | 0.48 | 0.500 | 1997_5_1 | 0.34 | 0.4766667 | The economy moved through a period of slower growth in 1996 during which inflationary pressures eased significantly. The March quarter CPI result confirms that underlying inflation has returned to an annual rate of close to 2 per cent, and the prospects of inflation remaining low in the near future appear to be good. With some surplus capacity existing there is scope for the economy to grow more quickly in 1997 without generating significant inflationary pressures, provided growth in labour costs is not excessive. | 16.49250 | 1997 | may | 80 | 3 | 24.31583 |
1997_5_2 | 0.37 | 0.444 | 1997_5_2 | 0.29 | 0.3800000 | So far in 1997 there have already been some indications that the pace of growth is picking up, particularly in areas of construction investment, housing and consumer spending. Growth is being supported by several factors including the effect of the three policy easings in the second half of 1996, which brought cash rates down by 1½ percentage points. The impact of those cash rate reductions on home mortgage borrowers has been reinforced by a significant compression of intermediaries’ interest margins in that area. The lower interest rates now in place should be supportive of interest-sensitive areas of activity, particularly housing and non-residential construction, as well as helping household and business cash flows. Other factors favourable to growth at present include the strong US economy, moderately rising commodity prices, and an historically good level of business profitability in many industries. | 17.23177 | 1997 | may | 137 | 5 | 20.32181 |
1997_5_3 | 0.78 | 0.596 | 1997_5_3 | 0.40 | 0.2833333 | Conditions in the manufacturing sector have been an exception to this generally firmer picture, with profitability under pressure and investment intentions declining. The pressure on profitability reflects a combination of rising wage costs and flat selling prices, which continue to be constrained in many parts of the manufacturing sector by strong international competition. With the exchange rate having risen in trade-weighted terms, pressures on competitiveness have intensified during the past year. Looking ahead, however, a number of areas of domestic manufacturing are likely to benefit from the expansion in housing and non-residential construction now under way. | 17.61583 | 1997 | may | 96 | 4 | 11.51250 |
1997_5_4 | 0.49 | 0.516 | 1997_5_4 | 0.47 | 0.3666667 | Employment growth has lagged behind the overall pace of economic activity. Total employment has been growing at a rate of around 1 per cent in the past year, concentrated in part-time jobs. Nonetheless, the number of job vacancies has increased and employment can be expected to strengthen as the general pace of activity picks up over the course of the year. | 11.68000 | 1997 | may | 60 | 3 | 46.94500 |
1997_5_5 | 0.52 | 0.260 | 1997_5_5 | 0.63 | 0.7133333 | The favourable near-term outlook for inflation is being underpinned by continued help from the exchange rate in holding down import prices. Also helpful to the outlook is the result of the recent Safety-Net Review by the Australian Industrial Relations Commission, which delivered only moderate increases in award wages. However, other developments in labour costs are of more concern for the longer-term inflation outlook. Wage increases under enterprise bargaining continue to be in the 4 to 5 per cent range, figures which appear high in a climate of 2 per cent inflation and 8½ per cent unemployment. Aggregate wages data, which encompass workers on award wages, enterprise agreements and other bargaining arrangements, suggest that the overall pace of wages growth has picked up recently. These figures will need to be closely watched to assess the extent to which they represent a significant change in trend but, taken at face value, they suggest that wages growth is becoming uncomfortably high. | 15.49661 | 1997 | may | 155 | 6 | 29.97159 |
1997_5_6 | 0.27 | 0.320 | 1997_5_6 | 0.67 | 0.6933333 | The capacity of the economy to grow faster while maintaining low inflation will depend importantly on the future behaviour of wages. A significant acceleration of aggregate wages in response to stronger economic growth would directly curtail job creation. It would also threaten faster inflation – to which monetary policy would have to respond – and thereby put at risk the potential for faster non-inflationary growth. Lessening of wages growth would, on the other hand, enable faster growth without risk of acceleration in inflation. | 14.68081 | 1997 | may | 83 | 4 | 26.76652 |
## export results
# write.csv(prediction_smp, "smp_prediction_results.csv")
In this section, we explore how predictions would change for the two readability models after removing top 5 features one by one.
Regenerate model predictions using the economist readability (content) models by removing the top 5 variables one by one. A snapshot of the output is:
### drop variable to how prediction results change
#1
<- mydata
clarity_var_data $NN <- 0
clarity_var_data<- predict(model_eco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
drop_var_eco_clarity_NN #2
<- mydata
clarity_var_data $pos_prop_VB <- 0
clarity_var_data<- predict(model_eco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
drop_var_eco_clarity_propVB #3
<- mydata
clarity_var_data $pos_prop_CC <- 0
clarity_var_data<- predict(model_eco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
drop_var_eco_clarity_propCC #4
<- mydata
clarity_var_data $pos_prop_VBP <- 0
clarity_var_data<- predict(model_eco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
drop_var_eco_clarity_propVBP #5
<- mydata
clarity_var_data $pos_prop_RB <- 0
clarity_var_data<- predict(model_eco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
drop_var_eco_clarity_propRB
<- data.frame(question_index = mydata$question_index,
drop_var_eco_results var_eco_clarity_NN = drop_var_eco_clarity_NN$high,
var_eco_clarity_propVB = drop_var_eco_clarity_propVB$high,
var_eco_clarity_propCC = drop_var_eco_clarity_propCC$high,
var_eco_clarity_propVBP = drop_var_eco_clarity_propVBP$high,
var_eco_clarity_propRB = drop_var_eco_clarity_propRB$high)
<- cbind(predict_result_clarity, drop_var_eco_results)
var_drop_result_eco_clarity
#Take a look of the data
%>%
var_drop_result_eco_clarity head() %>% kbl() %>%
kable_paper() %>%
scroll_box(width = "100%", height = "200px")
question_index | eco_clarity | noneco_clarity | question_index | var_eco_clarity_NN | var_eco_clarity_propVB | var_eco_clarity_propCC | var_eco_clarity_propVBP | var_eco_clarity_propRB |
---|---|---|---|---|---|---|---|---|
1997_5_1 | 0.48 | 0.500 | 1997_5_1 | 0.51 | 0.46 | 0.40 | 0.51 | 0.60 |
1997_5_2 | 0.37 | 0.444 | 1997_5_2 | 0.42 | 0.36 | 0.26 | 0.42 | 0.55 |
1997_5_3 | 0.78 | 0.596 | 1997_5_3 | 0.73 | 0.72 | 0.54 | 0.75 | 0.80 |
1997_5_4 | 0.49 | 0.516 | 1997_5_4 | 0.53 | 0.52 | 0.38 | 0.49 | 0.59 |
1997_5_5 | 0.52 | 0.260 | 1997_5_5 | 0.53 | 0.48 | 0.38 | 0.44 | 0.60 |
1997_5_6 | 0.27 | 0.320 | 1997_5_6 | 0.26 | 0.38 | 0.28 | 0.25 | 0.39 |
## export results
#save the data
#write.csv(var_drop_result_eco_clarity, "drop_var_eco_clarity_results.csv")
Regenerate model predictions using the non-economist readability (content) models by removing the top 5 variables one by one. A snapshot of the output is:
#1
<- mydata
clarity_var_data $NP <- 0
clarity_var_data<- predict(model_noneco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
drop_var_noneco_clarity_NP #2
<- mydata
clarity_var_data $pos_prop_JJ <- 0
clarity_var_data<- predict(model_noneco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
drop_var_noneco_clarity_propJJ #3
<- mydata
clarity_var_data $pos_prop_DT <- 0
clarity_var_data<- predict(model_noneco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
drop_var_noneco_clarity_propDT #4
<- mydata
clarity_var_data $readability_stats.sylls <- 0
clarity_var_data<- predict(model_noneco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
drop_var_noneco_clarity_sylls #5
<- mydata
clarity_var_data $fk_grade_level <- 0
clarity_var_data<- predict(model_noneco_clarity, clarity_var_data, type = "prob") %>% as.data.frame()
drop_var_noneco_clarity_fk
<- data.frame(question_index = mydata$question_index,
drop_var_noneco_results var_noneco_clarity_NP = drop_var_noneco_clarity_NP$high,
var_noneco_clarity_propJJ = drop_var_noneco_clarity_propJJ$high,
var_noneco_clarity_propDT = drop_var_noneco_clarity_propDT$high,
var_noneco_clarity_sylls = drop_var_noneco_clarity_sylls$high,
var_noneco_clarity_fk = drop_var_noneco_clarity_fk$high)
<- data.frame(question_index = mydata$question_index,
predict_result_clarity eco_clarity = probsTest_model_eco_clarity$high,
noneco_clarity = probsTest_model_noneco_clarity$high)
<- cbind(predict_result_clarity, drop_var_noneco_results)
var_drop_result
##take a look of the result
%>%
var_drop_result head() %>% kbl() %>%
kable_paper() %>%
scroll_box(width = "100%", height = "200px")
question_index | eco_clarity | noneco_clarity | question_index | var_noneco_clarity_NP | var_noneco_clarity_propJJ | var_noneco_clarity_propDT | var_noneco_clarity_sylls | var_noneco_clarity_fk |
---|---|---|---|---|---|---|---|---|
1997_5_1 | 0.48 | 0.500 | 1997_5_1 | 0.488 | 0.536 | 0.492 | 0.480 | 0.492 |
1997_5_2 | 0.37 | 0.444 | 1997_5_2 | 0.464 | 0.468 | 0.456 | 0.488 | 0.476 |
1997_5_3 | 0.78 | 0.596 | 1997_5_3 | 0.596 | 0.592 | 0.572 | 0.596 | 0.592 |
1997_5_4 | 0.49 | 0.516 | 1997_5_4 | 0.540 | 0.540 | 0.500 | 0.504 | 0.472 |
1997_5_5 | 0.52 | 0.260 | 1997_5_5 | 0.308 | 0.256 | 0.284 | 0.324 | 0.300 |
1997_5_6 | 0.27 | 0.320 | 1997_5_6 | 0.396 | 0.360 | 0.344 | 0.356 | 0.340 |
## export results
##save the data
# write.csv(var_drop_result, "clarity_model_var_drop_result.csv")
Conduct a cross-sectional comparison by scoring sample paragraphs from variables sources, including the Bank of England (BOE) Inflation Report Introduction and Boxes, RBA Speeches, articles from The Economist , and the RBA SMP Introduction and Boxes published in 2018 and 2019. The output is saved in the data output folder, and the results are discussed in section 7.2 of the paper.
Import the sample paragraphs with all text features.A snapshot of the text data is shown as below:
#read data
<- readRDS("./data_input/cross_analysis_feature1_final.rds")
cross_data
#clean data
$word_per_sentence <- cross_data$word_count_stats/cross_data$sentence_count
cross_data$sylls_per_word <- cross_data$readability_stats.sylls/cross_data$word_count_stats
cross_datais.na(cross_data)] <-0 # replace NA with 0s
cross_data[<- cross_data %>% select(-index, -index.y)
cross_data
names(cross_data) <- gsub(x = names(cross_data), pattern = "word_1st_", replacement = "pos_word1_")
names(cross_data) <- gsub(x = names(cross_data), pattern = "word_2nd_", replacement = "pos_word2_")
names(cross_data) <- gsub(x = names(cross_data), pattern = "word_3rd_", replacement = "pos_word3_")
names(cross_data) <- gsub(x = names(cross_data), pattern ='[$]', replacement = 'ds')
%>%
cross_data head() %>% kbl() %>%
kable_paper() %>%
scroll_box(width = "100%", height = "200px")
text_source | autor | year | paragraph | question_index | paragraph_clean | index.x | word_count_stats | sentence_count | readability_stats.sylls | readability_stats.polys | fk_grade_level | FRES_score | comma_count | punc_count | digit_count | CC | CD | DT | IN | JJ | MD | NN | NNS | POS | PRP | PRPds | RB | TO | VB | VBG | VBN | VBP | VBZ | WDT | WRB | VBD | EX | WPds | JJR | RP | JJS | NNPS | RBR | RBS | PDT | FW | WP | NNP | UH | pos_prop_CC | pos_prop_CD | pos_prop_DT | pos_prop_IN | pos_prop_JJ | pos_prop_MD | pos_prop_NN | pos_prop_NNS | pos_prop_POS | pos_prop_PRP | pos_prop_PRPds | pos_prop_RB | pos_prop_TO | pos_prop_VB | pos_prop_VBG | pos_prop_VBN | pos_prop_VBP | pos_prop_VBZ | pos_prop_WDT | pos_prop_WRB | pos_prop_VBD | pos_prop_EX | pos_prop_WPds | pos_prop_JJR | pos_prop_RP | pos_prop_JJS | pos_prop_NNPS | pos_prop_RBR | pos_prop_RBS | pos_prop_PDT | pos_prop_FW | pos_prop_WP | pos_prop_NNP | pos_prop_UH | sent_1st_CC | sent_1st_DT | sent_1st_IN | sent_1st_JJ | sent_1st_NN | sent_1st_NNS | sent_1st_TO | sent_1st_VBD | sent_1st_VBG | sent_1st_CD | sent_1st_JJR | sent_1st_PRP | sent_1st_VB | sent_1st_VBZ | sent_1st_MD | sent_1st_RB | sent_1st_RBR | sent_1st_VBN | sent_1st_VBP | sent_1st_POS | sent_1st_PRPds | sent_1st_WDT | sent_1st_JJS | sent_1st_RP | sent_1st_WRB | sent_1st_EX | sent_1st_PDT | sent_1st_NNP | sent_1st_RBS | sent_1st_WP | sent_1st_FW | sent_1st_prop_CC | sent_1st_prop_DT | sent_1st_prop_IN | sent_1st_prop_JJ | sent_1st_prop_NN | sent_1st_prop_NNS | sent_1st_prop_TO | sent_1st_prop_VBD | sent_1st_prop_VBG | sent_1st_prop_CD | sent_1st_prop_JJR | sent_1st_prop_PRP | sent_1st_prop_VB | sent_1st_prop_VBZ | sent_1st_prop_MD | sent_1st_prop_RB | sent_1st_prop_RBR | sent_1st_prop_VBN | sent_1st_prop_VBP | sent_1st_prop_POS | sent_1st_prop_PRPds | sent_1st_prop_WDT | sent_1st_prop_JJS | sent_1st_prop_RP | sent_1st_prop_WRB | sent_1st_prop_EX | sent_1st_prop_PDT | sent_1st_prop_NNP | sent_1st_prop_RBS | sent_1st_prop_WP | sent_1st_prop_FW | start_word_pos | second_word_pos | third_word_pos | sent1st_clue_Attitudinal | sent1st_clue_connective | sent1st_clue_Contrast | sent1st_clue_detail | sent1st_clue_inference | sent1st_clue_reformulation | sent1st_clue_summary | sent1st_clue_transition | sentlast_clue_Attitudinal | sentlast_clue_connective | sentlast_clue_Contrast | sentlast_clue_detail | sentlast_clue_emphasis | sentlast_clue_inference | sentlast_clue_reformulation | sentlast_clue_summary | sentlast_clue_transition | sentmiddle_clue_Attitudinal | sentmiddle_clue_connective | sentmiddle_clue_Contrast | sentmiddle_clue_detail | sentmiddle_clue_emphasis | sentmiddle_clue_inference | sentmiddle_clue_reformulation | sentmiddle_clue_summary | sent_1st_word_ds | sent_1st_word_, | sent_1st_word_CC | sent_1st_word_CD | sent_1st_word_DT | sent_1st_word_EX | sent_1st_word_FW | sent_1st_word_IN | sent_1st_word_JJ | sent_1st_word_JJR | sent_1st_word_JJS | sent_1st_word_MD | sent_1st_word_NN | sent_1st_word_NNP | sent_1st_word_NNS | sent_1st_word_PDT | sent_1st_word_POS | sent_1st_word_PRP | sent_1st_word_PRPds | sent_1st_word_RB | sent_1st_word_RBR | sent_1st_word_RBS | sent_1st_word_RP | sent_1st_word_TO | sent_1st_word_VB | sent_1st_word_VBD | sent_1st_word_VBG | sent_1st_word_VBN | sent_1st_word_VBP | sent_1st_word_VBZ | sent_1st_word_WDT | sent_1st_word_WP | sent_1st_word_WPds | sent_1st_word_WRB | sent_1st_parse_ADJP | sent_1st_parse_ADVP | sent_1st_parse_NP | sent_1st_parse_PP | sent_1st_parse_S | sent_1st_parse_SBAR | sent_1st_parse_SINV | sent_1st_parse_VP | sent_1st_parse_WHADVP | sent_1st_parse_WHNP | sent_1st_parse_WHPP | sent_1st_parse_SBARQ | sent_1st_parse_SQ | sent_last_parse_ADJP | sent_last_parse_ADVP | sent_last_parse_NP | sent_last_parse_PP | sent_last_parse_S | sent_last_parse_SBAR | sent_last_parse_SINV | sent_last_parse_VP | sent_last_parse_WHADVP | sent_last_parse_WHNP | sent_last_parse_WHPP | sent_last_parse_SBARQ | sent_last_parse_SQ | ADJP | ADVP | NP | PP | S | SBAR | SINV | VP | WHADVP | WHNP | WHPP | SBARQ | SQ | pos_word1_, | pos_word1_0 | pos_word1_CC | pos_word1_CD | pos_word1_DT | pos_word1_EX | pos_word1_FW | pos_word1_IN | pos_word1_JJ | pos_word1_JJR | pos_word1_JJS | pos_word1_MD | pos_word1_NN | pos_word1_NNP | pos_word1_NNS | pos_word1_PDT | pos_word1_PRP | pos_word1_PRPds | pos_word1_RB | pos_word1_RBR | pos_word1_RBS | pos_word1_RP | pos_word1_TO | pos_word1_VB | pos_word1_VBD | pos_word1_VBG | pos_word1_VBN | pos_word1_VBP | pos_word1_VBZ | pos_word1_WDT | pos_word1_WP | pos_word1_WRB | pos_word2_CC | pos_word2_CD | pos_word2_DT | pos_word2_EX | pos_word2_FW | pos_word2_IN | pos_word2_JJ | pos_word2_JJR | pos_word2_JJS | pos_word2_LS | pos_word2_MD | pos_word2_NN | pos_word2_NNP | pos_word2_NNS | pos_word2_PRP | pos_word2_PRPds | pos_word2_RB | pos_word2_RBS | pos_word2_RP | pos_word2_TO | pos_word2_VB | pos_word2_VBD | pos_word2_VBG | pos_word2_VBN | pos_word2_VBP | pos_word2_VBZ | pos_word2_WDT | pos_word2_WP | pos_word2_WRB | pos_word2_<NA> | pos_word3_, | pos_word3_0 | pos_word3_CC | pos_word3_CD | pos_word3_DT | pos_word3_EX | pos_word3_FW | pos_word3_IN | pos_word3_JJ | pos_word3_JJR | pos_word3_JJS | pos_word3_MD | pos_word3_NN | pos_word3_NNP | pos_word3_NNS | pos_word3_PDT | pos_word3_PRP | pos_word3_PRPds | pos_word3_RB | pos_word3_RBR | pos_word3_RBS | pos_word3_RP | pos_word3_TO | pos_word3_VB | pos_word3_VBD | pos_word3_VBG | pos_word3_VBN | pos_word3_VBP | pos_word3_VBZ | pos_word3_WDT | pos_word3_WP | pos_word3_WRB | word_per_sentence | sylls_per_word |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
speech | Guy Debelle[*]Deputy Governor | 2018 | When reading through the Bank’s forecasts, I think it is useful to avoid false precision. An important question to ask is: are these revisions to the Bank’s outlook consequential for the monetary policy decision? Similarly you can ask, do I think these changes affect my own decisions about my household or my business? A tenth or two of a percentage point here or there on the outlook for GDP or inflation is unlikely to matter that much for any of those decisions. Often, these revisions reflect the new information that has come to hand over the previous quarter. This leads us to reassess the starting point for our forecasts of where the economy is then likely to go. In making this assessment, we ask whether the incoming data have been view-changing or view-validating. Over the previous three months , the data have generally been view-validating. | 2 | When reading through the Bank’s forecasts, I think it is useful to avoid false precision. An important question to ask is: are these revisions to the Bank’s outlook consequential for the monetary policy decision? Similarly you can ask, do I think these changes affect my own decisions about my household or my business? A tenth or two of a percentage point here or there on the outlook for GDP or inflation is unlikely to matter that much for any of those decisions. Often, these revisions reflect the new information that has come to hand over the previous quarter. This leads us to reassess the starting point for our forecasts of where the economy is then likely to go. In making this assessment, we ask whether the incoming data have been view-changing or view-validating. Over the previous three months , the data have generally been view-validating. | 1 | 144 | 8 | 233 | 25 | 10.523056 | 51.67750 | 5 | 14 | 0 | 5 | 2 | 21 | 15 | 15 | 1 | 21 | 10 | 2 | 6 | 4 | 6 | 7 | 9 | 3 | 3 | 7 | 6 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3.42 | 1.37 | 14.38 | 10.27 | 10.27 | 0.68 | 14.38 | 6.85 | 1.37 | 4.11 | 2.74 | 4.11 | 4.79 | 6.16 | 2.05 | 2.05 | 4.79 | 4.11 | 0.68 | 1.37 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 5 | 4 | 1 | 6 | 3 | 2 | 0 | 2 | 1 | 1 | 1 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3.33 | 16.67 | 13.33 | 3.33 | 20.00 | 10.00 | 6.67 | 0.00 | 6.67 | 3.33 | 3.33 | 3.33 | 6.67 | 3.33 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | DT | VBP | NN | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 3 | 1 | 2 | 1 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 6 | 6 | 86 | 32 | 21 | 6 | 0 | 28 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 18.00000 | 1.618056 |
speech | Guy Debelle[*]Deputy Governor | 2018 | As I just said, the forecasts we have just published are little changed from those we published in February. Table 1 shows the Bank’s outlook for GDP growth, the unemployment rate and inflation until June 2020. This is generally the time horizon that is relevant for the Board’s deliberations on monetary policy. | 4 | As I just said, the forecasts we have just published are little changed from those we published in February. Table 1 shows the Bank’s outlook for GDP growth, the unemployment rate and inflation until June 2020. This is generally the time horizon that is relevant for the Board’s deliberations on monetary policy. | 2 | 50 | 3 | 87 | 10 | 11.442000 | 42.71433 | 2 | 5 | 5 | 1 | 2 | 7 | 7 | 3 | 0 | 14 | 2 | 2 | 2 | 0 | 4 | 0 | 0 | 0 | 3 | 2 | 3 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.85 | 3.70 | 12.96 | 12.96 | 5.56 | 0.00 | 25.93 | 3.70 | 3.70 | 3.70 | 0.00 | 7.41 | 0.00 | 0.00 | 0.00 | 5.56 | 3.70 | 5.56 | 1.85 | 0.00 | 1.85 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 2 | 3 | 3 | 4 | 3 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 2 | 1 | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3.85 | 7.69 | 11.54 | 11.54 | 15.38 | 11.54 | 3.85 | 0.00 | 0.00 | 0.00 | 3.85 | 0.00 | 3.85 | 3.85 | 0.00 | 7.69 | 3.85 | 3.85 | 7.69 | 0.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | JJ | JJ | DT | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 3 | 3 | 1 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 1 | 1 | 0 | 0 | 2 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 25 | 11 | 9 | 3 | 0 | 19 | 0 | 2 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 16.66667 | 1.740000 |
speech | Guy Debelle[*]Deputy Governor | 2018 | Other developments in financial conditions can affect the forecasts too. One example is developments in money market rates. In the SMP, we document the recent rise in money market interest rates in the US, particularly LIBOR . There are a number of explanations for the rise, including a large increase in bill issuance by the US Treasury and the effect of various tax changes on investment decisions by CFOs at some US companies with large cash pools. This increases the wholesale funding costs for the Australian banks, as well as increasing the costs for borrowers whose lending rates are priced off BBSW, which includes many corporates. | 7 | Other developments in financial conditions can affect the forecasts too. One example is developments in money market rates. In the SMP, we document the recent rise in money market interest rates in the US, particularly LIBOR . There are a number of explanations for the rise, including a large increase in bill issuance by the US Treasury and the effect of various tax changes on investment decisions by CFOs at some US companies with large cash pools. This increases the wholesale funding costs for the Australian banks, as well as increasing the costs for borrowers whose lending rates are priced off BBSW, which includes many corporates. | 3 | 105 | 5 | 173 | 22 | 12.041905 | 46.13143 | 5 | 5 | 0 | 1 | 1 | 14 | 18 | 9 | 1 | 23 | 18 | 0 | 4 | 0 | 4 | 0 | 1 | 2 | 1 | 2 | 3 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.95 | 0.95 | 13.33 | 17.14 | 8.57 | 0.95 | 21.90 | 17.14 | 0.00 | 3.81 | 0.00 | 3.81 | 0.00 | 0.95 | 1.90 | 0.95 | 1.90 | 2.86 | 0.95 | 0.00 | 0.00 | 0.95 | 0.95 | 0.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 5 | 10 | 3 | 11 | 5 | 2 | 2 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.17 | 10.87 | 21.74 | 6.52 | 23.91 | 10.87 | 4.35 | 4.35 | 0.00 | 2.17 | 0.00 | 0.00 | 4.35 | 0.00 | 0.00 | 2.17 | 2.17 | 2.17 | 0.00 | 2.17 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NN | VB | TO | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 12 | 6 | 3 | 0 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 2 | 1 | 2 | 0 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 3 | 5 | 40 | 15 | 15 | 3 | 0 | 20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 21.00000 | 1.647619 |
speech | Guy Debelle[*]Deputy Governor | 2018 | However, the effect to date has not been that large in terms of the overall impact on bank funding costs. Thus far, it has not been a consequential development from a forecasting point of view. It is not clear how much of the rise in LIBOR is due to structural changes in money markets and how much is temporary. In the last couple of weeks, these money market rates have declined noticeably from their peaks. We will continue to monitor how this unfolds in the period ahead. | 8 | However, the effect to date has not been that large in terms of the overall impact on bank funding costs. Thus far, it has not been a consequential development from a forecasting point of view. It is not clear how much of the rise in LIBOR is due to structural changes in money markets and how much is temporary. In the last couple of weeks, these money market rates have declined noticeably from their peaks. We will continue to monitor how this unfolds in the period ahead. | 4 | 87 | 5 | 127 | 11 | 8.421287 | 65.67745 | 3 | 5 | 0 | 1 | 0 | 9 | 13 | 9 | 1 | 16 | 7 | 0 | 3 | 1 | 9 | 3 | 2 | 0 | 3 | 1 | 6 | 0 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.15 | 0.00 | 10.34 | 14.94 | 10.34 | 1.15 | 18.39 | 8.05 | 0.00 | 3.45 | 1.15 | 10.34 | 3.45 | 2.30 | 0.00 | 3.45 | 1.15 | 6.90 | 0.00 | 3.45 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 3 | 1 | 5 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.26 | 5.26 | 15.79 | 5.26 | 26.32 | 10.53 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 10.53 | 0.00 | 5.26 | 0.00 | 0.00 | 5.26 | 0.00 | 0.00 | 5.26 | 5.26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | IN | JJ | IN | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 12 | 4 | 2 | 1 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 18 | 7 | 3 | 2 | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 7 | 4 | 69 | 28 | 18 | 6 | 0 | 25 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 17.40000 | 1.459770 |
speech | Guy Debelle[*]Deputy Governor | 2018 | The outlook for the economy in 2018 and 2019 is expected to be a little stronger than occurred in 2017. GDP growth is expected to pick up from around 2½ per cent currently to be around 3¼ over the next couple of years. | 9 | The outlook for the economy in 2018 and 2019 is expected to be a little stronger than occurred in 2017. GDP growth is expected to pick up from around 2½ per cent currently to be around 3¼ over the next couple of years. | 5 | 38 | 2 | 60 | 4 | 10.451579 | 53.97105 | 0 | 2 | 14 | 1 | 5 | 4 | 10 | 1 | 0 | 6 | 1 | 0 | 0 | 0 | 2 | 3 | 3 | 0 | 3 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2.33 | 11.63 | 9.30 | 23.26 | 2.33 | 0.00 | 13.95 | 2.33 | 0.00 | 0.00 | 0.00 | 4.65 | 6.98 | 6.98 | 0.00 | 6.98 | 0.00 | 4.65 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 2.33 | 2.33 | 0.00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 3 | 3 | 5 | 12 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3.33 | 10.00 | 10.00 | 16.67 | 40.00 | 3.33 | 3.33 | 0.00 | 3.33 | 0.00 | 0.00 | 0.00 | 0.00 | 3.33 | 0.00 | 0.00 | 0.00 | 3.33 | 0.00 | 3.33 | 0.00 | 0.00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | VBG | IN | CC | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 5 | 1 | 4 | 1 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | 3 | 1 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 8 | 2 | 7 | 2 | 0 | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 19.00000 | 1.578947 |
speech | Guy Debelle[*]Deputy Governor | 2018 | One part of the answer is that the global conjuncture is constructive. We have been witnessing the most synchronised pick-up in the global economy for quite some time. The US economy is doing well. Europe is doing better than it has been for the past decade. The Japanese economy has recorded its longest period of quarterly growth in almost three decades. The global conjuncture has been reflected in a pick-up in global industrial production and trade that is particularly beneficial to the east Asian region, which is leveraged to the global cycle. | 11 | One part of the answer is that the global conjuncture is constructive. We have been witnessing the most synchronised pick-up in the global economy for quite some time. The US economy is doing well. Europe is doing better than it has been for the past decade. The Japanese economy has recorded its longest period of quarterly growth in almost three decades. The global conjuncture has been reflected in a pick-up in global industrial production and trade that is particularly beneficial to the east Asian region, which is leveraged to the global cycle. | 6 | 92 | 6 | 161 | 20 | 11.040000 | 43.22167 | 1 | 8 | 0 | 1 | 2 | 12 | 10 | 14 | 0 | 18 | 1 | 0 | 3 | 1 | 4 | 2 | 0 | 3 | 6 | 1 | 9 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1.09 | 2.17 | 13.04 | 10.87 | 15.22 | 0.00 | 19.57 | 1.09 | 0.00 | 3.26 | 1.09 | 4.35 | 2.17 | 0.00 | 3.26 | 6.52 | 1.09 | 9.78 | 2.17 | 0.00 | 0.00 | 0.00 | 0.00 | 1.09 | 0.00 | 2.17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 3 | 1 | 4 | 2 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 5.88 | 5.88 | 17.65 | 5.88 | 23.53 | 11.76 | 0.00 | 0.00 | 0.00 | 0.00 | 5.88 | 0.00 | 0.00 | 5.88 | 0.00 | 5.88 | 0.00 | 0.00 | 0.00 | 5.88 | 5.88 | 0.00 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | DT | VBP | VBD | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | 5 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 3 | 1 | 19 | 7 | 5 | 2 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 5 | 3 | 57 | 23 | 15 | 2 | 0 | 15 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 15.33333 | 1.750000 |
Score the cross-sectional dataset using reasoning models. A snapshot of the table is shown as below:
#model scoring
<- predict(model_eco_content,cross_data, type = "prob") %>% as.data.frame()
cross_content_model_eco <- predict(model_noneco_content,cross_data, type = "prob") %>% as.data.frame()
cross_content_model_noneco
#extract prediction result
<- data.frame(question_index = cross_data$question_index,
cross_predict_result eco_content_cross = cross_content_model_eco$high,
noneco_content_cross = cross_content_model_noneco$high)
<- base::cbind(cross_predict_result, fk_grade_level=cross_data$fk_grade_level,
cross_output_data_content FRES_score=cross_data$FRES_score,text_source=cross_data$text_source,
autor=cross_data$autor, year=cross_data$year)
# write.csv(cross_output_data_content,"cross_output_data_content.csv")
#take a look of the result
%>%
cross_output_data_contenthead() %>% kbl() %>%
kable_paper() %>%
scroll_box(width = "100%", height = "200px")
question_index | eco_content_cross | noneco_content_cross | fk_grade_level | FRES_score | text_source | autor | year |
---|---|---|---|---|---|---|---|
2 | 0.85 | 0.8500000 | 10.523056 | 51.67750 | speech | Guy Debelle[*]Deputy Governor | 2018 |
4 | 0.29 | 0.2966667 | 11.442000 | 42.71433 | speech | Guy Debelle[*]Deputy Governor | 2018 |
7 | 0.67 | 0.4933333 | 12.041905 | 46.13143 | speech | Guy Debelle[*]Deputy Governor | 2018 |
8 | 0.74 | 0.6966667 | 8.421287 | 65.67745 | speech | Guy Debelle[*]Deputy Governor | 2018 |
9 | 0.36 | 0.3933333 | 10.451579 | 53.97105 | speech | Guy Debelle[*]Deputy Governor | 2018 |
11 | 0.38 | 0.5266667 | 11.040000 | 43.22167 | speech | Guy Debelle[*]Deputy Governor | 2018 |
Regenerate predictions using readability models. A snapshot of the table is shown as below:
<- predict(model_eco_clarity,cross_data, type = "prob") %>% as.data.frame()
cross_clarity_model_eco
$pos_word2_RBR <- 0
cross_data<- predict(model_noneco_clarity,cross_data, type = "prob") %>% as.data.frame()
cross_clarity_model_noneco
#extract prediction result
<- data.frame(question_index = cross_data$question_index,
cross_predict_result_clarity eco_clarity_cross = cross_clarity_model_eco$high,
noneco_clarity_cross = cross_clarity_model_noneco$high)
<- left_join(cross_output_data_content,cross_predict_result_clarity, by = "question_index")
cross_score_result
#take a look of the data
%>%
cross_score_result head() %>% kbl() %>%
kable_paper() %>%
scroll_box(width = "100%", height = "200px")
question_index | eco_content_cross | noneco_content_cross | fk_grade_level | FRES_score | text_source | autor | year | eco_clarity_cross | noneco_clarity_cross |
---|---|---|---|---|---|---|---|---|---|
2 | 0.85 | 0.8500000 | 10.523056 | 51.67750 | speech | Guy Debelle[*]Deputy Governor | 2018 | 0.60 | 0.416 |
4 | 0.29 | 0.2966667 | 11.442000 | 42.71433 | speech | Guy Debelle[*]Deputy Governor | 2018 | 0.49 | 0.684 |
7 | 0.67 | 0.4933333 | 12.041905 | 46.13143 | speech | Guy Debelle[*]Deputy Governor | 2018 | 0.45 | 0.572 |
8 | 0.74 | 0.6966667 | 8.421287 | 65.67745 | speech | Guy Debelle[*]Deputy Governor | 2018 | 0.45 | 0.608 |
9 | 0.36 | 0.3933333 | 10.451579 | 53.97105 | speech | Guy Debelle[*]Deputy Governor | 2018 | 0.40 | 0.676 |
11 | 0.38 | 0.5266667 | 11.040000 | 43.22167 | speech | Guy Debelle[*]Deputy Governor | 2018 | 0.47 | 0.400 |
#save the output
# write.csv(cross_score_result,"cross_predict_result.csv")
Extract speech data to see how the scores changing within a document. The results are discussed in thesection 7.3 of the paper.
<- cross_data %>% filter(text_source=="speech")
speech_data <- speech_data %>% select(question_index,index.x,text_source, year, autor, paragraph)
speech_data_short
## find the speech title for each speeches and the order of paragraphs
<- read.csv("./data_input/speech_full.csv")
speech_source <- cross_score_result %>% filter(text_source=="speech")
speech_model_result
## Join the speech source data and the speech model results using question_index
<- left_join(speech_model_result,speech_source, by = "question_index")
speech_scores
library(stringr)
$paragraph <- str_squish(speech_scores$paragraph)
speech_scores$cross_data.paragraph <- str_squish(speech_scores$cross_data.paragraph)
speech_scores
<- speech_scores %>% select(question_index, paragraph, cross_data.paragraph) %>%
para_check mutate(check = ifelse(as.character(paragraph)==as.character(cross_data.paragraph), "yes","no"))
# ## check
# para_check %>% filter(check == "no") # the output is NA, which means that the source are correctly found for each paragraph
## clean the data a little bit
<- speech_scores %>% select(-cross_data.question_index, -cross_data.paragraph,
speech_scores_clean -para_first50_letters, -X.1,
-cross_data.fk_grade_level, -cross_data.fk_grade_level, -year.y,
-cross_data.text_source, -X) %>% filter(!is.na(paragraph))
%>%
speech_scores_clean head() %>% kbl() %>%
kable_paper() %>%
scroll_box(width = "100%", height = "200px")
question_index | eco_content_cross | noneco_content_cross | fk_grade_level | FRES_score | text_source | autor | year.x | eco_clarity_cross | noneco_clarity_cross | index | cross_data.FRES_score | cross_data.autor | web_link | para_order | author | event | paragraph |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2 | 0.85 | 0.8500000 | 10.523056 | 51.67750 | speech | Guy Debelle[*]Deputy Governor | 2018 | 0.60 | 0.416 | 363 | 51.67750 | Guy Debelle[*]Deputy Governor | G:/Research/JoanH/20181022_SMP Project/data-paragraphs/speeches/-dg-2018-05-15-1.csv | 2 | Guy Debelle[*]Deputy Governor | Opening Keynote at the CFO Forum Sydney – 15 May 2018 | When reading through the Bank’s forecasts, I think it is useful to avoid false precision. An important question to ask is: are these revisions to the Bank’s outlook consequential for the monetary policy decision? Similarly you can ask, do I think these changes affect my own decisions about my household or my business? A tenth or two of a percentage point here or there on the outlook for GDP or inflation is unlikely to matter that much for any of those decisions. Often, these revisions reflect the new information that has come to hand over the previous quarter. This leads us to reassess the starting point for our forecasts of where the economy is then likely to go. In making this assessment, we ask whether the incoming data have been view-changing or view-validating. Over the previous three months , the data have generally been view-validating. |
4 | 0.29 | 0.2966667 | 11.442000 | 42.71433 | speech | Guy Debelle[*]Deputy Governor | 2018 | 0.49 | 0.684 | 364 | 42.71433 | Guy Debelle[*]Deputy Governor | G:/Research/JoanH/20181022_SMP Project/data-paragraphs/speeches/-dg-2018-05-15-1.csv | 4 | Guy Debelle[*]Deputy Governor | Opening Keynote at the CFO Forum Sydney – 15 May 2018 | As I just said, the forecasts we have just published are little changed from those we published in February. Table 1 shows the Bank’s outlook for GDP growth, the unemployment rate and inflation until June 2020. This is generally the time horizon that is relevant for the Board’s deliberations on monetary policy. |
7 | 0.67 | 0.4933333 | 12.041905 | 46.13143 | speech | Guy Debelle[*]Deputy Governor | 2018 | 0.45 | 0.572 | 365 | 46.13143 | Guy Debelle[*]Deputy Governor | G:/Research/JoanH/20181022_SMP Project/data-paragraphs/speeches/-dg-2018-05-15-1.csv | 7 | Guy Debelle[*]Deputy Governor | Opening Keynote at the CFO Forum Sydney – 15 May 2018 | Other developments in financial conditions can affect the forecasts too. One example is developments in money market rates. In the SMP, we document the recent rise in money market interest rates in the US, particularly LIBOR . There are a number of explanations for the rise, including a large increase in bill issuance by the US Treasury and the effect of various tax changes on investment decisions by CFOs at some US companies with large cash pools. This increases the wholesale funding costs for the Australian banks, as well as increasing the costs for borrowers whose lending rates are priced off BBSW, which includes many corporates. |
8 | 0.74 | 0.6966667 | 8.421287 | 65.67745 | speech | Guy Debelle[*]Deputy Governor | 2018 | 0.45 | 0.608 | 366 | 65.67745 | Guy Debelle[*]Deputy Governor | G:/Research/JoanH/20181022_SMP Project/data-paragraphs/speeches/-dg-2018-05-15-1.csv | 8 | Guy Debelle[*]Deputy Governor | Opening Keynote at the CFO Forum Sydney – 15 May 2018 | However, the effect to date has not been that large in terms of the overall impact on bank funding costs. Thus far, it has not been a consequential development from a forecasting point of view. It is not clear how much of the rise in LIBOR is due to structural changes in money markets and how much is temporary. In the last couple of weeks, these money market rates have declined noticeably from their peaks. We will continue to monitor how this unfolds in the period ahead. |
9 | 0.36 | 0.3933333 | 10.451579 | 53.97105 | speech | Guy Debelle[*]Deputy Governor | 2018 | 0.40 | 0.676 | 367 | 53.97105 | Guy Debelle[*]Deputy Governor | G:/Research/JoanH/20181022_SMP Project/data-paragraphs/speeches/-dg-2018-05-15-1.csv | 9 | Guy Debelle[*]Deputy Governor | Opening Keynote at the CFO Forum Sydney – 15 May 2018 | The outlook for the economy in 2018 and 2019 is expected to be a little stronger than occurred in 2017. GDP growth is expected to pick up from around 2½ per cent currently to be around 3¼ over the next couple of years. |
11 | 0.38 | 0.5266667 | 11.040000 | 43.22167 | speech | Guy Debelle[*]Deputy Governor | 2018 | 0.47 | 0.400 | 368 | 43.22167 | Guy Debelle[*]Deputy Governor | G:/Research/JoanH/20181022_SMP Project/data-paragraphs/speeches/-dg-2018-05-15-1.csv | 11 | Guy Debelle[*]Deputy Governor | Opening Keynote at the CFO Forum Sydney – 15 May 2018 | One part of the answer is that the global conjuncture is constructive. We have been witnessing the most synchronised pick-up in the global economy for quite some time. The US economy is doing well. Europe is doing better than it has been for the past decade. The Japanese economy has recorded its longest period of quarterly growth in almost three decades. The global conjuncture has been reflected in a pick-up in global industrial production and trade that is particularly beneficial to the east Asian region, which is leveraged to the global cycle. |
## export results
# write.csv(speech_scores_clean, "speech_scores_clean.csv")
The session information for this program is shown as below.
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_Australia.1252 LC_CTYPE=English_Australia.1252
## [3] LC_MONETARY=English_Australia.1252 LC_NUMERIC=C
## [5] LC_TIME=English_Australia.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] kableExtra_1.3.4 stringr_1.4.0 plyr_1.8.6
## [4] questionr_0.7.4 klaR_0.6-15 MASS_7.3-53
## [7] PRROC_1.3.1 randomForest_4.6-14 dplyr_1.0.6
## [10] tidyr_1.1.3 caret_6.0-86 ggplot2_3.3.3
## [13] lattice_0.20-41
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.2 viridisLite_0.4.0 splines_4.0.3
## [4] foreach_1.5.1 prodlim_2019.11.13 shiny_1.6.0
## [7] assertthat_0.2.1 highr_0.9 stats4_4.0.3
## [10] yaml_2.2.1 ipred_0.9-11 pillar_1.6.0
## [13] glue_1.4.2 pROC_1.17.0.1 digest_0.6.27
## [16] promises_1.2.0.1 rvest_1.0.0 colorspace_2.0-1
## [19] recipes_0.1.16 htmltools_0.5.1.1 httpuv_1.6.1
## [22] Matrix_1.2-18 timeDate_3043.102 pkgconfig_2.0.3
## [25] labelled_2.8.0 haven_2.4.1 purrr_0.3.4
## [28] xtable_1.8-4 webshot_0.5.2 scales_1.1.1
## [31] svglite_2.0.0 later_1.2.0 gower_0.2.2
## [34] lava_1.6.9 tibble_3.1.1 combinat_0.0-8
## [37] generics_0.1.0 ellipsis_0.3.2 withr_2.4.2
## [40] nnet_7.3-14 survival_3.2-7 magrittr_2.0.1
## [43] crayon_1.4.1 mime_0.10 evaluate_0.14
## [46] fansi_0.4.2 nlme_3.1-149 xml2_1.3.2
## [49] forcats_0.5.1 class_7.3-17 tools_4.0.3
## [52] data.table_1.14.0 hms_1.0.0 lifecycle_1.0.0
## [55] munsell_0.5.0 compiler_4.0.3 systemfonts_1.0.1
## [58] rlang_0.4.11 grid_4.0.3 iterators_1.0.13
## [61] rstudioapi_0.13 miniUI_0.1.1.1 rmarkdown_2.8
## [64] gtable_0.3.0 ModelMetrics_1.2.2.2 codetools_0.2-16
## [67] DBI_1.1.1 reshape2_1.4.4 R6_2.5.0
## [70] lubridate_1.7.10 knitr_1.33 fastmap_1.1.0
## [73] utf8_1.2.1 stringi_1.5.3 Rcpp_1.0.6
## [76] vctrs_0.3.8 rpart_4.1-15 tidyselect_1.1.1
## [79] xfun_0.22