# Data_import_functions.R
###======================================================================================
format_dates_F <- function(pmtrs, date_key) {
  pmtrs$ST_date_N <- date_key$number[match(pmtrs$ST_date_C, date_key$string)]
  pmtrs$proj_dates <- 
    date_key$number[date_key$q_end == 1 & date_key$number >= pmtrs$ST_date_N] %>%
    head(pmtrs$proj_window + 1)
  return(pmtrs)
}
###======================================================================================
###======================================================================================
###======================================================================================
# Top level function
import_and_format_SAFFI_data_F <- function(date_key, SAFFI_queries, SAFFI_queries_complex, 
                                           variable_definitions,
                                           pmtrs, LVR_data_L,
                                           SAFFI_update, return_hist_data=FALSE,
                                           return_SAFFI_qery_item_mapping=FALSE) {
  
  ST_date <- pmtrs$ST_date_N
  newDBforms_date <- "2019-03-31"
  newDBforms_date <- date_key$number[match(newDBforms_date, date_key$string)]
  hist_dates <- date_key$number[date_key$q_end == 1 & date_key$number <= ST_date] %>%
    tail(pmtrs$hist_window)
  form_key <- variable_definitions[, c('ST_item_code', 'ST_variable_name', 'ST_type')]
  if(SAFFI_update){
    SAFFI_raw <- lapply(SAFFI_queries$queryid, SAFFI_pull_F, date_key, 
                        SAFFI_queries_complex)
    names(SAFFI_raw) <- SAFFI_queries$queryid
    dir.create('SAFFI', showWarnings=FALSE)
    invisible(mapply(
      function(x1, x2) write.csv(x1, file=paste0("SAFFI\\", x2, ".csv"), row.names=F), 
      SAFFI_raw, names(SAFFI_raw)))
  } else {
    SAFFI_raw <- lapply(SAFFI_queries$queryid, 
                        function(i) read.csv(paste0("SAFFI\\", i, ".csv"),
                                             header=T, stringsAsFactors=F))
    if(tail(SAFFI_raw[[1]]$period, 1) < ST_date) {
      stop('SAFFI data need udpating to capture ST start date')
    }
    names(SAFFI_raw) <- SAFFI_queries$queryid
  }
  if(return_SAFFI_qery_item_mapping) {
    query_items_F <- function(i) {
      bank <- unique(SAFFI_raw[[i]][, 'institution_code'])[
        unique(SAFFI_raw[[i]][, 'institution_code']) %in% pmtrs$banks_list]
      items <- colnames(SAFFI_raw[[i]])[
        !colnames(SAFFI_raw[[i]]) %in% c('period', 'institution_code')]
      return(data.frame(query=names(SAFFI_raw)[i], bank, items))
    }
    query_item_mapping <- do.call(rbind, lapply(1:length(SAFFI_raw), query_items_F))
    return(query_item_mapping)
  }
  SAFFI_fmt <- date_formatting_etc_F(SAFFI_raw, SAFFI_queries, newDBforms_date, 
                                     date_key, ST_date)
  SAFFI_by_bank <- lapply(pmtrs$banks_list, group_SAFFIdata_by_bank_F, 
                          SAFFI_fmt, SAFFI_queries)
  names(SAFFI_by_bank) <- pmtrs$banks_list
  hist_data_by_bank <- mapply(map_SAFFIdata_F, SAFFI_by_bank,
                              MoreArgs=list(variable_definitions, form_key),
                              SIMPLIFY=FALSE)
  hist_data_by_bank <- mapply(variable_mapping_F, 
                              hist_data_by_bank, names(hist_data_by_bank), 
                              MoreArgs=list(pmtrs, form_key), 
                              SIMPLIFY=FALSE)
  hist_data_by_bank <- lapply(hist_data_by_bank, 
                              function(x) x[x$period %in% hist_dates, ])
  hist_data_by_bank <- lapply(hist_data_by_bank, 
                              function(x) x[names(hist_data_by_bank[[1]])])
  hist_data_by_bank <- fill_all_NA_pdlgd_F(hist_data_by_bank)
 
  if(return_hist_data) {
    return(hist_data_by_bank)
  } else {
    out_dir <- "Output/Historical/"
    dir.create('Output', showWarnings=FALSE)
    dir.create(out_dir, showWarnings=FALSE) 
    mapply(function(x, y) write.csv(x, paste0(out_dir, y, '.csv'), row.names=FALSE), 
           hist_data_by_bank, names(hist_data_by_bank))
    ST_start_values <- lapply(hist_data_by_bank, 
                              function(x) tail(x[, colnames(x) != 'period'], 1))
    ST_start_values <- ST_start_values[pmtrs$banks_list]
    return(ST_start_values)
  }
}
###======================================================================================
connect_to_fido_F <- function(server,
                            database) {

  odbc_conn <- odbc::dbConnect(
    odbc::odbc(),
    driver = ,
    server = ,
    database = ,
    timeout = ,
    Trusted_Connection = ,
    ApplicationIntent = 
  )
  return(odbc_conn)
} # removed connection information
###======================================================================================
sql_query_reader_F <- function(query_text, ...) {
  fido_conn <- connect_to_fido_F()
  results <- odbc::dbGetQuery(fido_conn, query_text)
  return(results)
}
###======================================================================================
SAFFI_pull_F <- function(id, date_key, SAFFI_queries_complex){
  ifelse(id %in% SAFFI_queries_complex$queryid,
         y <- SAFFI_pull_complex_F(id, date_key),
         y <- SAFFI_pull_simple_F(id, date_key))
  return(y)
}
##=======================================================================================
SAFFI_pull_complex_F <- function(id, date_key){
  read_sql_files_F <- function(x){ 
    readr::read_file((paste0('SQL/rendered_templates/',x,".sql"))) #files not provided 
  }
  sql_queries <- read_sql_files_F(id)
  y <- as.data.frame(sql_query_reader_F(sql_queries))
  y$period <- date_key$number[match(as.character(y$period), date_key$string)]
  y$unique_id <- paste0(y[,5],"_",y[,6])
  y <- y[, c("period", "institution_code", "unique_id", "value")]
  y <- melt(as.data.table(y), id.vars = c("period", "institution_code", "unique_id", "value"))
  y <- dcast(y, formula = period + institution_code ~ unique_id)
  y <- as.data.frame(y)
  names(y) <- gsub("-", "", names(y))
  names(y) <- gsub(" ", "", names(y))
  names(y) <- gsub(",", "", names(y))
  y[is.na(y)] <- 0
  return(y)
}
###======================================================================================
SAFFI_pull_simple_F <- function(id, date_key) {
  read_sql_files_F <- function(x){ 
    readr::read_file((paste0('SQL/rendered_templates/',x,".sql"))) #files not 
  }
  sql_queries <- read_sql_files_F(id)
  y <- as.data.frame(sql_query_reader_F(sql_queries))
  y$period <- date_key$number[match(as.character(y$period), date_key$string)]
  y$unique_id <- paste0(y[,1],"_",y[,2])
  y <- y[c("period", "institution_code", "unique_id", "value")]
  y <- melt(as.data.table(y), id.vars = c("period", "institution_code", "unique_id", "value"))
  y <- dcast(y, formula = period + institution_code ~ unique_id) %>% as.data.frame()
  y[is.na(y)] <- 0
  return(y)
}
###======================================================================================
date_formatting_etc_F <- function(SAFFI_raw, SAFFI_queries, newDBforms_date, 
                                  date_key, ST_date) {
  single_query_F <- function(i, SAFFI_raw, SAFFI_queries, newDBforms_date, 
                             date_key, ST_date) {
    data <- SAFFI_raw[[i]]
    if(SAFFI_queries$querycons[i] == "DomesticOld") {
      data[data$period >= newDBforms_date, ] <- NA
      colnames(data)[colnames(data) == "BSE00400"] <- "BSE00400_DB"
    }
    if(SAFFI_queries$querycons[i] == "DomesticNewMthly" | 
       SAFFI_queries$querycons[i] == "DomesticNewQtrly" |
       SAFFI_queries$querycons[i] == "DomesticNewMthly720_1A") {
      data[data$period < newDBforms_date, ] <- NA
    }
    if(SAFFI_queries$querycons[i] == "AccCons"){
      colnames(data)[colnames(data) == "BSE00400"] <- "BSE00400_cons"
    }
    data <- data[, colnames(data) != 'institution_code']
    data <- data[data[, 'period'] %in% date_key$number[date_key$q_end == 1] & 
                   data[, 'period'] <= ST_date, ]
    return(data)
  }
  SAFFI_fmt <- lapply(1:nrow(SAFFI_queries), single_query_F, 
                      SAFFI_raw, SAFFI_queries, newDBforms_date, date_key, ST_date)
  names(SAFFI_fmt) <- names(SAFFI_raw)
  SAFFI_fmt
}
###======================================================================================
group_SAFFIdata_by_bank_F <- function(bank, SAFFI_fmt, SAFFI_queries){
  bank_data_L <- SAFFI_fmt[SAFFI_queries$institution == bank]
  sapply(bank_data_L, dim)
  bank_data <- bank_data_L[[1]]
  for(i in 2:length(bank_data_L)) {
    dup_cols <- colnames(bank_data_L[[i]]) %in% colnames(bank_data) & 
      colnames(bank_data_L[[i]]) != 'period'
    bank_data_L[[i]] <- bank_data_L[[i]][, !dup_cols]
    bank_data <- base::merge(bank_data, bank_data_L[[i]], by='period', all.x=T, all.y=T)
  }
  bank_data$bank <- bank  
  bank_data
}
###======================================================================================
map_SAFFIdata_F <- function(SAFFI_bank, variable_definitions, form_key) {
  item_names <- as.character(unique(form_key$ST_variable_name))
  agg_to_item_level_F <- function(item_name, SAFFI_bank, form_key) {
    form_keys <- form_key$ST_item_code[form_key$ST_variable_name == item_name]
    rowSums(cbind(SAFFI_bank[, colnames(SAFFI_bank) %in% form_keys]), na.rm=TRUE)
  }
  data <- data.frame(SAFFI_bank$period, 
                     do.call(cbind, lapply(item_names, agg_to_item_level_F, 
                                           SAFFI_bank, form_key)))
  colnames(data) <- c('period', item_names)
  data
}

###======================================================================================
format_macro_inputs_F <- function(macro, ST_start_data, pmtrs) {
  ST_date <- pmtrs$ST_date_N
  if(any(is.na(macro[1:3,]))){stop("not enough macro data supplied")}
  macro$hp_growth <- c(NA, 100*(tail(macro$hp_index, -1)/head(macro$hp_index, -1) - 1))
  macro$cp_growth <- c(NA, 100*(tail(macro$cp_index, -1)/head(macro$cp_index, -1) - 1))
  if(any(is.na(macro[3:nrow(macro), ]))){
    for (i in 3:nrow(macro)) {
      if(is.na(macro$gdp_growth[i])){macro$gdp_growth[i] <- 0.6 + 
        -0.177*(macro$gdp_growth[i - 1] - 0.6) + 
        0.036*(macro$gdp_growth[i - 2] - 0.6) + 
        0.141*(macro$un_rate[i-1] - 4.5)
      } 
      if(is.na(macro$un_rate[i])){macro$un_rate[i] <- 4.5 + 
        1.236 *(macro$un_rate[i-1] - 4.5)  - 0.234*(macro$un_rate[i-2] - 4.5) -
        0.036*(macro$gdp_growth[i] - 0.6) - 0.078*(macro$gdp_growth[i-1] - 0.6) -
        0.097*(macro$gdp_growth[i-2] - 0.6)
      } 
      if(is.na(macro$hp_growth[i])){macro$hp_growth[i] <- 0.322 +
        0.632*(macro$hp_growth[i-1]) + 0.212*(macro$gdp_growth[i])
      }
      if(is.na(macro$cp_growth[i])){macro$cp_growth[i] <- -0.05 + 
        0.769*(macro$cp_growth[i-1]) + 0.155*(macro$gdp_growth[i]) + 
        0.203*(macro$hp_growth[i])
      }
    }
  }
  format_year_ended_growth_F <- function(macro){
    x <- data.frame(gdp_growth_quarterly = (macro$gdp_growth/100) + 1)  
    num_quarters_ye <- 4
    x$gdp_growth_ye[0:nrow(macro)] <- NA
    for (i in 0:(nrow(macro) - num_quarters_ye)) {
      x$gdp_growth_ye[i + num_quarters_ye] <- (x$gdp_growth_quarterly[i+1] *  
                                                 x$gdp_growth_quarterly[i + 2] * 
                                                 x$gdp_growth_quarterly[i + 3] *
                                                 x$gdp_growth_quarterly[i + 4]) - 1
    }
    x <- x$gdp_growth_ye * 100
    return(x)
  }
  macro$gdp_growth_ye <- format_year_ended_growth_F(macro)
  macro <- macro[macro$date >= ST_date, ]
  macro <- macro[, c('hp_growth', 'un_rate', 'gdp_growth', 'cp_growth', 'gdp_growth_ye')]
  extra_obs_needed <- (pmtrs$proj_window + 1) - nrow(macro)
  if(extra_obs_needed > 0) {
    macro <- macro[c(1:nrow(macro), rep(nrow(macro), extra_obs_needed)), ]
  }
  macro$quarter <- 0:(nrow(macro) - 1)
  return(macro)
}
###======================================================================================
fill_all_NA_pdlgd_F <- function(alldata) {
  vars <- grep('PD_|LGD_', colnames(alldata[[1]]), value=TRUE)
  some_NA_F <- function(x, vars) {
    some_NA <- apply(x[, vars], 2, function(x1) any(is.na(x1)) & !all(is.na(x1)))
    for(v in vars[some_NA]) x[is.na(x[, v]), v] <- mean(x[, v], na.rm=TRUE)
    return(x)
  }
  alldata <- lapply(alldata, some_NA_F, vars)
  onevar_means_F <- function(v, alldata) {
    vardata <- sapply(alldata, function(x) x[, v]) 
    varmeans <- ifelse(apply(vardata, 1, function(x) all(is.na(x))), 
                       NA, rowMeans(vardata, na.rm=TRUE))  
    return(varmeans)
  }
  varmeans <- sapply(vars, onevar_means_F, alldata)
  all_NA_F <- function(x, vars, varmeans) {
    replace_vars <- vars[is.na(x[1, vars])] 
    x[, replace_vars] <- varmeans[, replace_vars]
    return(x)
  }
  alldata <- lapply(alldata, all_NA_F, vars, varmeans)
  all_means_F <- function(pref, alldata) {
    mean_data <- lapply(alldata, function(x) x[, grepl(paste0('^', pref), names(x))])
    do.call(cbind, mean_data) %>% rowMeans(na.rm=TRUE)
  }
  apply_all_means_F <- function(x, pref, alldata) {
    replace_vars <- grepl(paste0('^', pref), colnames(x)) & is.na(x[1, ])
    x[, replace_vars] <- all_means_F(pref, alldata)
    return(x)
  }
  alldata <- lapply(alldata, apply_all_means_F, pref='PD', alldata)
  alldata <- lapply(alldata, apply_all_means_F, pref='LGD', alldata)
  alldata <- lapply(alldata, apply_all_means_F, pref='minPD', alldata)
  alldata <- lapply(alldata, apply_all_means_F, pref='minLGD', alldata)
  return(alldata)
}
###======================================================================================
variable_mapping_F <- function(x, names_x, pmtrs, form_key) {
  target_cet1ratio <- pmtrs$target_cet1ratio
  ME_of_cap_on_fund_rate <- pmtrs$ME_of_cap_on_fund_rate
  cash_rate_start <- pmtrs$cash_rate_start
  
  x <- as.data.frame(x)
  data <- data.frame(period=x$period)
  if(!names_x %in% pmtrs$banks_level1) {
    data <- construct_loan_ACs_and_prvns_F(x, data, pmtrs)
    data <- construct_other_ACs_F(x, data)
    
    data$bsh_total_equity <- x$bsh_total_equity
    data$total_prvn_from_forms <- x$bsh_tot_loans_colprov + x$bsh_tot_loans_indprov
    
  } else {
    data <- Lvl1Cons_bsh_variables_F(x, pmtrs)
  }
  
  data$bsh_total_AC <- 
    rowSums(data[, grepl('^AC_ln_|^AC_sec_|^AC_oth_', colnames(data))])
  data$bsh_total_check_322 <- x$bsh_total_assets_cons
  
  data$cash_rate <- cash_rate_start
  
  data$bsh_prpn_fund_whlsale <- 
    1 - (x$bsh_dep_liab_dom_total - x$bsh_liab_dep_adis + x$bsh_dep_liab_intragroup) / 
    x$bsh_total_liabilities
  
  data$bsh_growth <- 0
 
  data$cap_ratio_cet1 <- x$cap_cet1 / x$rwa_total
  data$cap_ratio_tier1 <- x$cap_tier1 / x$rwa_total
  data$cap_ratio_total <- (x$cap_tier1 + x$cap_tier2) / x$rwa_total
  data$rwa_avg_RW_noncash <- x$rwa_total/(data$bsh_total_AC - data$AC_oth_cash)
  data$rwa_avg_RW <- x$rwa_total/data$bsh_total_AC
  data$rwa_lag_avg_RW <- c(NA, head(data$rwa_avg_RW, -1))
  
  cet1_ratio_dif <- diff(data$cap_ratio_cet1[data$cap_ratio_cet1 > 0])
  data$cap_devn_cet1_ratio <- median(abs(cet1_ratio_dif), na.rm=TRUE)

  data$liq_AmountRBAEligibleALA <- x$liq_AmountRBAEligibleALA - x$liq_AmountEligibleCLF
  data$liq_AmountAdditionalRepo <- 0
  data$liq_runOffRateLCRDeposits <- x$liq_WeightedAmountDeposits / x$liq_BalanceDeposits
  data$liq_runOffRateLCRWholesaleFunding <-
    x$liq_WeightedAmountWholesaleFunding / x$liq_BalanceWholesaleFunding
  data$liq_runOffRateLCRObligationsFacilities <-
    x$liq_WeightedAmountObligationsFacilities / x$liq_BalanceObligationsFacilities
  data$liq_runOffRateLCROtherOutflows <-
    x$liq_WeightedAmountOtherOutflows / x$liq_BalanceOtherOutflows
  data$liq_runOffRateLCRTotalInflows <-
    x$liq_WeightedAmountTotalInflows / x$liq_BalanceTotalInflows
  data$liq_usedForeignCentralBankBalances <- 0
  data$liq_usedOtherHQLA1Securities <- 0
  data$liq_usedAusGovBonds <- 0
  data$liq_usedSemiGovBonds <- 0
  data$liq_repoedAmountRBAEligibleALA <- 0
  data$liq_repoedAmountEligibleCLF <- 0
  data$liq_additionalRepoRequirement <- 0
  
 
  data$pnl_avg_fund_rate <- 
    c(NA, tail(x$pnl_int_exp*4, -1)/head(data$bsh_total_AC - data$bsh_total_equity, -1))
  
  data$pnl_avg_lend_rate <- c(NA, tail(x$pnl_int_inc*4, -1)/head(data$bsh_total_AC, -1))
  
  data$pnl_fund_spr_contagion <- 0
  
  data$cap_ratio_cet1_lag <- c(NA, head(data$cap_ratio_cet1, -1))
  
  data$pnl_net_int_inc <- x$pnl_int_inc - x$pnl_int_exp
  data$pnl_net_int_margin <- 
    c(NA, tail(x$pnl_int_inc - x$pnl_int_exp, -1)*4/head(data$bsh_total_AC, -1))
  
  data$pnl_other_inc_hist <- x$pnl_other_inc
  data$pnl_op_exp_hist <- x$pnl_op_exp
  
  data$pnl_other_inc <- mean(tail(x$pnl_other_inc, 4))
  data$pnl_op_exp <- mean(tail(x$pnl_op_exp, 4))
  data$pnl_other_inc_contemp <- x$pnl_other_inc
  data$pnl_op_exp_contemp <- x$pnl_op_exp
  
  data$pnl_firesale_loss <- 0
  
  
  data$firesale_run_liabilities <- 0
  data$firesale_lost_cash <- 0
  data$firesale_lost_AGS <- 0
  data$firesale_lost_semis <- 0
  data$firesale_lost_othsec <- 0
  

  data <- cbind(data, x[, grep('^bsh_outs_res_mort_|^bsh_new_res_mort_', colnames(x))])
  
 
  untouched_variables <-
    form_key$ST_variable_name[form_key$ST_type != 'not_currently_in_output' &
                                form_key$ST_variable_name != '' &
                                !form_key$ST_variable_name %in% colnames(data)] %>%
    unique()

  data <- cbind(data, x[, untouched_variables])
  
  data <- contruct_loan_PDs_LGDs_F(x, data)

  return(data)
}
###======================================================================================
# This function is used in variable_mapping_F
construct_loan_ACs_and_prvns_F <- function(x, data, pmtrs) {
 
  data$PR_dom_res_mort <- x$bsh_dom_res_mort_indprov + x$bsh_dom_res_mort_colprov
  data$AC_ln_dom_res_mort <- x$bsh_dom_res_mort_tot - 
    data$PR_dom_res_mort
 
  data$PR_os_res_mort <- 
    pmax(0, x$bsh_tot_varrate_mort_indprov + x$bsh_tot_varrate_mort_colprov +
           x$bsh_tot_fixrate_mort_indprov + x$bsh_tot_fixrate_mort_colprov -
           (x$bsh_dom_res_mort_indprov + x$bsh_dom_res_mort_colprov))
  data$AC_ln_os_res_mort <- 
    pmax(0, x$bsh_tot_varrate_mort_total + 
           x$bsh_tot_fixrate_mort_total - x$bsh_dom_res_mort_tot - 
           data$PR_os_res_mort)
 
  data$PR_creditcards <- x$bsh_tot_creditcards_indprov + 
    x$bsh_tot_creditcards_colprov
  data$AC_ln_creditcards <- x$bsh_tot_creditcards_total - 
    data$PR_creditcards
 
  cc_prvn_ratio <- (x$bsh_tot_creditcards_indprov + x$bsh_tot_creditcards_colprov) /
    x$bsh_tot_creditcards_total
  cc_prvn_ratio <- ifelse(x$bsh_tot_creditcards_total == 0, 0, cc_prvn_ratio)
  cc_dom_prvns <- pmax(cc_prvn_ratio * x$bsh_dom_personalcredit_CCs,
                       x$bsh_dom_personalcredit_CCs_indprov)
  
  data$PR_dom_oth_personal <- x$bsh_dom_personalcredit_indprov + 
    x$bsh_dom_personalcredit_colprov - cc_dom_prvns
  data$AC_ln_dom_oth_personal <- x$bsh_dom_personalcredit_total - 
    x$bsh_dom_personalcredit_CCs - 
    data$PR_dom_oth_personal
  
  sov_prvn_ratio <- 
    (x$bsh_dom_govt_generalloans_indprov + x$bsh_dom_govt_generalloans_colprov) / 
    x$bsh_dom_govt_generalloans_total
  sov_prvn_ratio <- ifelse(x$bsh_dom_govt_generalloans_total == 0, 0, sov_prvn_ratio)
  sov_nonfin_prvns <- 
    pmax(sov_prvn_ratio *(x$bsh_dom_nonfinbus_sttgovt + x$bsh_dom_nonfinbus_comgovt),
         x$bsh_dom_nonfinbus_sttgovt_indprov + x$bsh_dom_nonfinbus_comgovt_indprov)
  
  data$PR_dom_sov <- sov_nonfin_prvns + x$bsh_dom_govt_generalloans_indprov + 
    x$bsh_dom_govt_generalloans_colprov 
  data$AC_ln_dom_sov <- x$bsh_dom_govt_generalloans_total + 
    x$bsh_dom_nonfinbus_sttgovt + x$bsh_dom_nonfinbus_comgovt + x$bsh_dom_FIs_RBA - 
    data$PR_dom_sov
  
  data$PR_dom_business <- x$bsh_dom_nonfinbus_tot_indprov + 
    x$bsh_dom_nonfinbus_tot_colprov - sov_nonfin_prvns +
    x$bsh_dom_community_orgs_indprov + x$bsh_dom_community_orgs_colprov
  data$AC_ln_dom_business <- x$bsh_dom_nonfinbus_tot + x$bsh_dom_community_orgs_total -
    x$bsh_dom_nonfinbus_sttgovt - x$bsh_dom_nonfinbus_comgovt - 
    data$PR_dom_business
  
  data$PR_dom_fincorp <- x$bsh_dom_FIs_tot_indprov + x$bsh_dom_FIs_tot_colprov
  data$AC_ln_dom_fincorp <- x$bsh_dom_FIs_tot_total - x$bsh_dom_FIs_RBA - 
    data$PR_dom_fincorp
 
  data$PR_dom_cp <- 
    pmax(x$bsh_dom_cp_loans_tot * (data$PR_dom_business / data$AC_ln_dom_business), 
         x$bsh_dom_cp_loans_indprov)
  
  data$PR_dom_cp[data$AC_ln_dom_business == 0] <- 
    x$bsh_dom_cp_loans_indprov[data$AC_ln_dom_business == 0]
  data$AC_ln_dom_cp <- x$bsh_dom_cp_loans_tot + x$bsh_dom_cp_own_carryvalue -
    data$PR_dom_cp
  
  data$PR_dom_business <- data$PR_dom_business - data$PR_dom_cp
  data$AC_ln_dom_business <- data$AC_ln_dom_business - data$AC_ln_dom_cp
  
  data$PR_os_cp <- 
    pmax(x$bsh_os_cp_loans_tot * (data$PR_dom_business / data$AC_ln_dom_business), 
         x$bsh_os_cp_loans_indprov)
  data$AC_ln_os_cp <- x$bsh_os_cp_loans_tot + x$bsh_os_cp_own_carryvalue - 
    data$PR_os_cp
  
  data$PR_os_oth_loans <- x$bsh_tot_loans_indprov + x$bsh_tot_loans_colprov -
    (data$PR_dom_res_mort + data$PR_os_res_mort + data$PR_creditcards + 
       data$PR_dom_oth_personal + data$PR_dom_sov + data$PR_dom_business + 
       data$PR_dom_fincorp + data$PR_dom_cp + data$PR_os_cp)
  data$AC_ln_os_oth_loans <- x$bsh_tot_loans_total -
    (data$AC_ln_dom_res_mort + data$AC_ln_os_res_mort + data$AC_ln_creditcards + 
       data$AC_ln_dom_oth_personal + data$AC_ln_dom_sov + data$AC_ln_dom_business + 
       data$AC_ln_dom_fincorp + data$AC_ln_dom_cp + data$AC_ln_os_cp) -
    data$PR_os_oth_loans - 
    (data$PR_dom_res_mort + data$PR_os_res_mort + data$PR_creditcards + 
       data$PR_dom_oth_personal + data$PR_dom_sov + data$PR_dom_business + 
       data$PR_dom_fincorp + data$PR_dom_cp + data$PR_os_cp)
 
  data$AC_ln_dom_res_mort <- 
    ifelse(data$AC_ln_os_oth_loans < 0, 
           data$AC_ln_dom_res_mort + data$AC_ln_os_oth_loans, 
           data$AC_ln_dom_res_mort)
  data$PR_dom_res_mort <- 
    ifelse(data$AC_ln_os_oth_loans < 0, 
           data$PR_dom_res_mort + data$PR_os_oth_loans, data$PR_dom_res_mort)
  data$PR_os_oth_loans <- 
    ifelse(data$AC_ln_os_oth_loans < 0, 0, data$PR_os_oth_loans)
  data$AC_ln_os_oth_loans <- pmax(0, data$AC_ln_os_oth_loans)
 
  data$business_credit_limits <- (x$off_bsh_credit_line_cards + 
                                    x$off_bsh_credit_line_margin + 
                                    x$off_bsh_credit_line_oth) 
  data$business_credit_limits_available <- data$business_credit_limits - 
    (x$off_bsh_credit_line_cards_used + x$off_bsh_credit_line_margin_used + 
       x$off_bsh_credit_line_oth_used)
  if(pmtrs$credit_line_drawdown){
    data$business_credit_drawdowns <- data$business_credit_limits_available *
      pmtrs$credit_line_drawdown_rate
  } else{
    data$business_credit_drawdowns <- data$business_credit_limits_available * 0
  }
  
  data$AC_ln_dom_business <- data$AC_ln_dom_business + data$business_credit_drawdowns 
 
  hist_sample <- (nrow(data) - pmtrs$hist_window + 1):nrow(data)
  if(any(data$PR_dom_res_mort[hist_sample] <= 0)) {
    hist_sample <- (max(which(data$PR_dom_res_mort == 0)) + 1):nrow(data)
  }       
  for(name in grep('^PR_', colnames(data), value=TRUE)) {
    data[, paste0('min', name)] <- min(data[hist_sample, name])
  }
  
  test <- c()
 
  test$mort_total <- (x$bsh_tot_varrate_mort_total + x$bsh_tot_fixrate_mort_total) - 
    (data$AC_ln_dom_res_mort + data$PR_dom_res_mort + 
       data$AC_ln_os_res_mort + data$PR_os_res_mort)
  test$dom_nonhouseholds <- 
    (x$bsh_dom_community_orgs_total + x$bsh_dom_nonfinbus_tot + 
       x$bsh_dom_govt_generalloans_total + x$bsh_dom_FIs_tot_total) - 
  
    (data$AC_ln_dom_business + - data$business_credit_drawdowns + data$PR_dom_business +
       data$AC_ln_dom_cp + data$PR_dom_cp +
       data$AC_ln_dom_sov + data$PR_dom_sov + 
       data$AC_ln_dom_fincorp + data$PR_dom_fincorp)
  test <- data.frame(mort_total=test$mort_total, 
                     dom_nonhouseholds=test$dom_nonhouseholds)
  test$total_loans <- x$bsh_tot_loans_total - 
    (data$AC_ln_dom_res_mort + data$PR_dom_res_mort +
       data$AC_ln_os_res_mort + data$PR_os_res_mort + 
       data$AC_ln_creditcards + data$PR_creditcards + 
       data$AC_ln_dom_oth_personal + data$PR_dom_oth_personal + 
       data$AC_ln_dom_sov + data$PR_dom_sov + 
       data$AC_ln_dom_business  - data$business_credit_drawdowns + 
       data$PR_dom_business + data$AC_ln_dom_fincorp + 
       data$PR_dom_fincorp + data$AC_ln_dom_cp + data$PR_dom_cp + 
       data$AC_ln_os_cp + data$PR_os_cp +
       data$AC_ln_os_oth_loans + data$PR_os_oth_loans)
  if(sum(abs(test$total_loans), na.rm=TRUE) > 1) warning('Asset classes not summing to proper totals')
  data$bsh_provisions_total <- rowSums(data[, grepl('^PR_', colnames(data))])
  return(data)
}
###======================================================================================
# This function is used in variable_mapping_F
construct_other_ACs_F <- function(x, data) {
  data$AC_oth_acceptances <- x$bsh_acceptances
  data$AC_oth_dom_deposits_fincorp <- x$bsh_dom_deposits_ADIs + 
    x$bsh_dom_deposits_RFCs + x$bsh_dom_deposits_othFIs
  data$AC_oth_cash <- x$bsh_notes_coins + x$bsh_deposits_atcall + x$bsh_gold +
    x$bsh_due_from_CHs + x$bsh_repo_collateral + x$bsh_due_from_FIs_tot +
    x$bsh_oth_deposits_ADIs + x$bsh_oth_deposits_other -
    data$AC_oth_dom_deposits_fincorp
  data$AC_oth_fixed_n_other_assets <- x$bsh_tot_net_intangible_assets + 
    x$bsh_tot_other_assets + x$bsh_tot_life_ins_investments + 
    x$bsh_tot_other_investments + x$bsh_tot_net_fixed_assets - 
    x$bsh_total_deferred_inc
  data$AC_oth_intragroup_assets <- 0
  
  data$AC_sec_ABS <- x$bsh_sec_hft_ABS + x$bsh_sec_nft_ABS
  data$AC_sec_corp <- x$bsh_sec_hft_corp_paper + x$bsh_sec_nft_corp_paper
  data$AC_sec_dom_ADI <- pmin(x$bsh_dom_sec_ST_ADIs + x$bsh_dom_sec_LT_ADIs,
                              x$bsh_sec_hft_ADI + x$bsh_sec_nft_ADI)
  data$AC_sec_os_ADI <- 
    pmax(0, x$bsh_sec_hft_ADI + x$bsh_sec_nft_ADI - data$AC_sec_dom_ADI)
  data$AC_sec_oth_debt <- x$bsh_sec_hft_oth_debt + x$bsh_sec_nft_oth_debt
  data$AC_sec_equity <- x$bsh_sec_hft_equities + x$bsh_sec_nft_oth_equities +
    x$bsh_sec_nft_ADI_equities + x$bsh_sec_nft_ins_equities
  data$AC_sec_AGS <- x$bsh_sec_hft_AGS + x$bsh_sec_nft_AGS
  data$AC_sec_semis <- x$bsh_sec_hft_semis + x$bsh_sec_nft_semis
  data$AC_sec_othgov <- x$bsh_sec_hft_frngovt + x$bsh_sec_nft_frngovt
  return(data)
}
###======================================================================================
# This function is used in variable_mapping_F
Lvl1Cons_bsh_variables_F <- function(x, pmtrs) {
  x <- as.data.frame(x)
  data <- data.frame(period=x$period)

  
  construct_loan_ACs_and_prvns_ING_F <- function(x, data, pmtrs) {
   
    data$PR_dom_res_mort <- x$bsh_dom_res_mort_indprov + x$bsh_dom_res_mort_colprov
    data$AC_ln_dom_res_mort <- x$bsh_dom_res_mort_tot - 
      data$PR_dom_res_mort
    
    data$PR_os_res_mort <- 0
    data$AC_ln_os_res_mort <- 0
    
    data$PR_creditcards <- x$bsh_dom_personalcredit_CCs_indprov 
    data$AC_ln_creditcards <- x$bsh_dom_personalcredit_CCs - 
      data$PR_creditcards
  
    data$PR_dom_oth_personal <- x$bsh_dom_personalcredit_indprov + 
      x$bsh_dom_personalcredit_colprov - x$bsh_dom_personalcredit_CCs_indprov
    data$AC_ln_dom_oth_personal <- x$bsh_dom_personalcredit_total - 
      x$bsh_dom_personalcredit_CCs - 
      data$PR_dom_oth_personal
   
    sov_prvn_ratio <- 
      (x$bsh_dom_govt_generalloans_indprov + x$bsh_dom_govt_generalloans_colprov) / 
      x$bsh_dom_govt_generalloans_total
    sov_prvn_ratio <- ifelse(x$bsh_dom_govt_generalloans_total == 0, 0, sov_prvn_ratio)
    sov_nonfin_prvns <- 
      pmax(sov_prvn_ratio * (x$bsh_dom_nonfinbus_sttgovt + x$bsh_dom_nonfinbus_comgovt),
           x$bsh_dom_nonfinbus_sttgovt_indprov + x$bsh_dom_nonfinbus_comgovt_indprov)
    
    data$PR_dom_sov <- sov_nonfin_prvns + x$bsh_dom_govt_generalloans_indprov + 
      x$bsh_dom_govt_generalloans_colprov 
    data$AC_ln_dom_sov <- x$bsh_dom_govt_generalloans_total + 
      x$bsh_dom_nonfinbus_sttgovt + x$bsh_dom_nonfinbus_comgovt + x$bsh_dom_FIs_RBA - 
      data$PR_dom_sov
    
    data$PR_dom_business <- x$bsh_dom_nonfinbus_tot_indprov + 
      x$bsh_dom_nonfinbus_tot_colprov - sov_nonfin_prvns +
      x$bsh_dom_community_orgs_indprov + x$bsh_dom_community_orgs_colprov
    data$AC_ln_dom_business <- x$bsh_dom_nonfinbus_tot + 
      x$bsh_dom_community_orgs_total -
      x$bsh_dom_nonfinbus_sttgovt - x$bsh_dom_nonfinbus_comgovt - 
      data$PR_dom_business
   
    data$PR_dom_fincorp <- x$bsh_dom_FIs_tot_indprov + x$bsh_dom_FIs_tot_colprov
    data$AC_ln_dom_fincorp <- x$bsh_dom_FIs_tot_total - x$bsh_dom_FIs_RBA - 
      data$PR_dom_fincorp

    data$PR_dom_cp <- 0
    data$AC_ln_dom_cp <- 0
   
    data$PR_os_cp <- 0
    data$AC_ln_os_cp <- 0
  
    data$PR_os_oth_loans <- x$bsh_dom_nonres_indprov + x$bsh_dom_nonres_colprov
    data$AC_ln_os_oth_loans <- x$bsh_dom_nonres_total -
      data$PR_os_oth_loans
    data$bsh_provisions_total <- rowSums(data[, grepl('^PR_', colnames(data))])
    return(data)
  }
  data <- construct_loan_ACs_and_prvns_ING_F(x, data, pmtrs)
  if(all(data$PR_dom_res_mort > 0)) {
    hist_sample <- 1:nrow(data)
  } else {
    hist_sample <- (max(which(data$PR_dom_res_mort == 0)) + 1):nrow(data)
  }
  for(name in grep('^PR_', colnames(data), value=TRUE)) {
    data[, paste0('min', name)] <- min(data[hist_sample, name])
  }
  
  data$bsh_lag1_mort_tot <- c(NA, 
                              tail(data$AC_ln_dom_res_mort + data$AC_ln_os_res_mort, -1))
  data$bsh_lag2_mort_tot <- c(NA, tail(data$bsh_lag1_mort_tot, -1))

  construct_other_ACs_ING_F <- function(x, data) {
    data$AC_oth_cash <- x$bsh_db_tot_currency_gold + x$bsh_db_deposits_RBA + 
      x$bsh_db_deposits_CHs + x$bsh_db_deposits_nonres
    data$AC_oth_acceptances <- x$bsh_db_acceptances
    data$AC_oth_fixed_n_other_assets <- x$bsh_db_tot_property_equip + 
      x$bsh_db_tot_intangibles + x$bsh_db_tot_other_assets - 
      x$bsh_db_deferred_fees_commissions
    data$AC_oth_intragroup_assets <- x$bsh_db_tot_intragroup_assets
    data$AC_oth_dom_deposits_fincorp <- x$bsh_dom_deposits_ADIs +
      x$bsh_dom_deposits_RFCs + x$bsh_dom_deposits_othFIs
    
    data$AC_sec_dom_ADI <- x$bsh_db_sec_ST_bills + x$bsh_dom_sec_LT_ADIs + 
      x$bsh_dom_sec_ST_ADIs
    data$AC_sec_os_ADI <- 0
    data$AC_sec_corp <- x$bsh_db_sec_ST_community_orgs + 
      x$bsh_db_sec_ST_nonfin_inv_funds + x$bsh_db_sec_ST_oth_nonfin_corps + 
      x$bsh_db_sec_ST_uninc_businesses + x$bsh_db_sec_ST_FIs + 
      x$bsh_db_sec_LT_community_orgs + x$bsh_db_sec_LT_nonfin_inv_funds + 
      x$bsh_db_sec_LT_oth_nonfin_corps + x$bsh_db_sec_LT_uninc_businesses + 
      x$bsh_db_sec_LT_FIs - 
      data$AC_sec_dom_ADI - (x$bsh_db_sec_tot_ST_related_parties + 
                               x$bsh_db_sec_tot_LT_related_parties)
    data$AC_sec_ABS <- 0
    data$AC_sec_equity <- x$bsh_db_sec_hft_equity + x$bsh_db_sec_nft_equity
    
    data$AC_sec_AGS <- x$bsh_db_sec_ST_AGS + x$bsh_db_sec_LT_AGS
    data$AC_sec_semis <- x$bsh_db_sec_ST_semis + x$bsh_db_sec_LT_semis
    data$AC_sec_othgov <- x$bsh_db_sec_ST_nonfin_localgovt + 
      x$bsh_db_sec_ST_nonfin_comm_govt + x$bsh_db_sec_LT_nonfin_localgovt + 
      x$bsh_db_sec_LT_nonfin_comm_govt
    
    data$AC_sec_oth_debt <- x$bsh_db_sec_ST_nonres + x$bsh_db_sec_LT_nonres
    
    return(data)
  }
  data <- construct_other_ACs_ING_F(x, data)
 
  data$business_credit_limits <- (x$off_bsh_credit_line_cards + 
                                    x$off_bsh_credit_line_margin + 
                                    x$off_bsh_credit_line_oth) 
  data$business_credit_limits_available <- data$business_credit_limits - 
    (x$off_bsh_credit_line_cards_used + x$off_bsh_credit_line_margin_used + 
       x$off_bsh_credit_line_oth_used)
  if(pmtrs$credit_line_drawdown){
    data$business_credit_drawdowns <- data$business_credit_limits_available * 
      pmtrs$credit_line_drawdown_rate
  } else{
    data$business_credit_drawdowns <- data$business_credit_limits_available * 0
  }
  
  data$AC_ln_dom_business <- data$AC_ln_dom_business + data$business_credit_drawdowns
  

  data$bsh_total_equity <- x$bsh_dom_equity
  
  data$total_prvn_from_forms <- x$bsh_dom_tot_indprov + x$bsh_dom_tot_colprov
  
  test <- x$bsh_total_loans_db -
    (data$AC_ln_dom_res_mort + data$PR_dom_res_mort +
       data$AC_ln_creditcards + data$PR_creditcards +
       data$AC_ln_dom_oth_personal + data$PR_dom_oth_personal +
       data$AC_ln_dom_sov + data$PR_dom_sov +
       data$AC_ln_dom_business - data$business_credit_drawdowns + 
       data$PR_dom_business + data$AC_ln_dom_fincorp + 
       data$PR_dom_fincorp + data$AC_ln_os_oth_loans + data$PR_os_oth_loans)
  if(abs(sum(test)) > 1) warning('Asset classes not summing to proper totals (ING)')
  
  return(data)
}
###======================================================================================
# This function is used in variable_mapping_F
contruct_loan_PDs_LGDs_F <- function(x, data) {

  ACs_ln <- colnames(data)[grepl('^AC_ln_', colnames(data))]
  newvars <- c(sub('AC_', 'PD_', ACs_ln), sub('AC_', 'LGD_', ACs_ln))
  data[, newvars] <- NA
 
  data$PD_ln_dom_business <-
    (x$CLM_pd_corp * (x$CLM_corp_on + x$CLM_corp_off) +
       x$CLM_pd_sme * (x$CLM_sme_corp_on + x$CLM_sme_corp_off) +
       x$CLM_pd_sme_retail_on * x$CLM_sme_retail_on +
       x$CLM_pd_sme_retail_off * x$CLM_sme_retail_off) /
    (x$CLM_corp_on + x$CLM_corp_off + x$CLM_sme_corp_on +
       x$CLM_sme_corp_off + x$CLM_sme_retail_on + x$CLM_sme_retail_off)
  data$PD_ln_dom_res_mort <- (x$CLM_pd_res_mort_on * x$CLM_res_mort_on + 
                                x$CLM_pd_res_mort_off * x$CLM_res_mort_off) /
    (x$CLM_res_mort_on + x$CLM_res_mort_off)
  data$PD_ln_creditcards <- (x$CLM_pd_rev_credit_on * x$CLM_rev_credit_on + 
                               x$CLM_pd_rev_credit_off * x$CLM_rev_credit_off) /
    (x$CLM_rev_credit_on + x$CLM_rev_credit_off)
  data$PD_ln_dom_oth_personal <- (x$CLM_pd_oth_retail_on * x$CLM_oth_retail_on + 
                                    x$CLM_pd_oth_retail_off * x$CLM_oth_retail_off) /
    (x$CLM_oth_retail_on + x$CLM_oth_retail_off)
  data$PD_ln_dom_sov <- x$CLM_pd_sov
  data$PD_ln_dom_cp <- x$CLM_pd_spec_lend
  data$PD_ln_os_cp <- x$CLM_pd_spec_lend
  data$PD_ln_os_res_mort <- data$PD_ln_dom_res_mort
  data$PD_ln_dom_fincorp <- x$CLM_pd_fin_corp
 
  data$LGD_ln_dom_business <-  
    (x$CLM_lgd_corp * (x$CLM_corp_on + x$CLM_corp_off) +
       x$CLM_lgd_sme * (x$CLM_sme_corp_on + x$CLM_sme_corp_off) +
       x$CLM_lgd_sme_retail_on * x$CLM_sme_retail_on +
       x$CLM_lgd_sme_retail_off * x$CLM_sme_retail_off) /
    (x$CLM_corp_on + x$CLM_corp_off + x$CLM_sme_corp_on +
       x$CLM_sme_corp_off + x$CLM_sme_retail_on + x$CLM_sme_retail_off)
  data$LGD_ln_dom_res_mort <- (x$CLM_lgd_res_mort_on * x$CLM_res_mort_on + 
                                 x$CLM_lgd_res_mort_off * x$CLM_res_mort_off) / 
    (x$CLM_res_mort_on + x$CLM_res_mort_off)
  data$LGD_ln_creditcards <- (x$CLM_lgd_rev_credit_on * x$CLM_rev_credit_on + 
                                x$CLM_lgd_rev_credit_off * x$CLM_rev_credit_off) / 
    (x$CLM_rev_credit_on + x$CLM_rev_credit_off)
  data$LGD_ln_dom_oth_personal <- (x$CLM_lgd_oth_retail_on * x$CLM_oth_retail_on +
                                     x$CLM_lgd_oth_retail_off * x$CLM_oth_retail_off) /
    (x$CLM_oth_retail_on + x$CLM_oth_retail_off)
  data$LGD_ln_dom_sov <- x$CLM_lgd_sov
  data$LGD_ln_dom_cp <- x$CLM_lgd_spec_lend
  data$LGD_ln_os_cp <- x$CLM_lgd_spec_lend
  data$LGD_ln_os_res_mort <- data$LGD_ln_dom_res_mort
  data$LGD_ln_dom_fincorp <- x$CLM_lgd_fin_corp

  data[, newvars] <- apply(data[, newvars], 2, 
                           function(x) ifelse(is.nan(x) | x == 0, NA, x))
  data[, grepl('^PD_ln_|^LGD_ln_', colnames(data))] <- 
    data[, grepl('^PD_ln_|^LGD_ln_', colnames(data))]/100
  hist <- 16
  for(name in grep('^PD_ln_|^LGD_ln_', colnames(data), value=TRUE)) {
    min2 <- sort(tail(data[, name], hist), na.last=TRUE)[2]
    data[, paste0('min', name)] <- min2
  }
  return(data)
}
###======================================================================================
###======================================================================================
###======================================================================================
# Top level function
generate_LVR_inputs_F <- 
  function(ST_start_values, pmtrs, sec_query, bankname_key, 
           mort_pd_lgd_correls, secdata_update=FALSE, show_comparison=FALSE) {
   
    filename <- paste0('Securitisation_data_inputs\\lvr_distributions_', 
                       pmtrs$ST_date_C, '.csv')
    file_exists <- file.exists(filename)
    if(!file_exists | secdata_update) {
      secdata <- securitisation_data_F(month = pmtrs$ST_date_C, query_file = sec_query)
      secdata <- aggregate_securitisation_to_property_level_F(secdata)
      secdata <- clean_and_format_secdata_F(secdata, pmtrs, bankname_key)
      lvr_distributions <- generate_lvr_distribution_F(secdata)
      dir.create('Securitisation_data_inputs', showWarnings=FALSE)
      write.csv(lvr_distributions, file=filename, row.names=FALSE) %>% invisible 
    } else {
      lvr_distributions <- read.csv(filename, stringsAsFactors=FALSE) %>% data.table
    }
    saffi_sec_comparisons_L <- 
      mapply(saffi_sec_comparison_F, pmtrs$banks_list, ST_start_values, 
             MoreArgs=list(lvr_distributions), SIMPLIFY=FALSE)
    secdata_coverage <- saffi_sec_comparisons_L %>% 
      sapply(function(x) sum(x$sec_orig_values)/sum(x$saf_orig_values)) %>% round(2)
    print('Proportion of SAFFI data captured in securitisation data (roughly):')
    print(secdata_coverage)
    if(show_comparison) {
      print('---------------------------------------------------------------------------')
      print('Comparison tables:')
      print(saffi_sec_comparisons_L)
    }
    mult_fit <- lm(multiplier ~ poly(LVR, 2), data=mort_pd_lgd_correls)
    LVR_data_L <- mapply(format_LVR_inputs_F, pmtrs$banks_list, ST_start_values, 
                         MoreArgs=list(lvr_distributions, mult_fit), SIMPLIFY=FALSE)
    return(LVR_data_L)
  }
###======================================================================================
# Called by generate_LVR_inputs_F
securitisation_data_F <- function(month, query_file) {
  sec_dt <- pull_securitisation_data_F(month, query_file)
  price_changes_dt <- corelogic_price_changes_F(sec_dt, month)
  cli::cli_alert("Updating property values in securitisation data")
  sec_dt <- sec_dt %>% 
    merge(price_changes_dt, all.x = TRUE,
          by = c("property_postcode", "property_type", "valuation_month")) %>% 
    .[, property_value_updated := most_recent_property_value * price_change] %>% 
    .[, offset_account_balance := data.table::fcoalesce(abs(offset_account_balance), 0)] %>% 
    .[, cur_balance_w_offset := current_balance - offset_account_balance] %>% 
    data.table::setkeyv(c("group_id", "loan_id", "current_balance", 
                          "property_postcode", "property_type", "valuation_month"))
  cli::cli_alert_success("Property values updated")
  cli::cli_alert_success("Securitisation data ready")
  sec_dt
}
###======================================================================================
# Called by securitisation_data_F
pull_securitisation_data_F <- function(month, query_file) {
  con <- DBI::dbConnect(odbc::odbc(),) #removed connection information 
  sec_query <- paste0(paste(query_file, collapse="\n"), "'", month, "';")
  cli::cli_alert("Pulling securitisation data for {.field {month}}")
  sec_dt <- DBI::dbGetQuery(con, sec_query) %>% 
    data.table::setDT() %>% 
    .[most_recent_property_value == 0 | is.na(most_recent_property_value), ":=" (
      valuation_month = origination_month,
      most_recent_property_value = original_property_value 
    )] %>% 
    .[is.na(valuation_month) | valuation_month > report_month, ":=" (
      valuation_month = origination_month,
      most_recent_property_value = original_property_value
    )] %>% 
    .[, property_type := data.table::fcase(property_type == 1L, "H",
                                           property_type == 2L, "U",
                                           default = NA_character_)
    ] %>% 
    .[is.na(group_loan_id), group_id := paste0(deal_id, "-", loan_id)] %>% 
    .[!is.na(group_loan_id), group_id := paste0(deal_id, "-", group_loan_id)] %>% 
    data.table::setkeyv(
      c("group_id", "loan_id", "current_balance", 
        "property_postcode", "property_type", "valuation_month"))
  cli::cli_alert_success("Securitisation data retrieved")
  sec_dt
  
}
###======================================================================================
# Called by securitisation_data_F
corelogic_price_changes_F <- function(sec_data, month) {
  corelogic_sa3_data_F <- function() {
    cli::cli_alert("Pulling corelogic data")
    cl_channel <- DBI::dbConnect(odbc::odbc(),) ## removed connection information 
    corelogic <- 
      DBI::dbGetQuery(cl_channel, 
                      paste()) %>% ## removed query information
      data.table::data.table() %>%
      .[order(metric_name, r_sa3, property_type, ref_date), ]
    postcode_sa3_mapping <- readxl::read_xlsx("abs_postcode_mapping.xlsx", 
                                              sheet = "Table 3", range = "B6:F3482") %>% 
      data.table::data.table() %>%
      .[-1, c("POSTCODE_2017", "SA3_CODE_2016", "SA3_NAME_2016", "RATIO")] %>%
      .[, POSTCODE_2017 := as.integer(POSTCODE_2017)] %>% 
      .[, SA3_CODE_2016 := as.integer(SA3_CODE_2016)]
    cli::cli_alert_success("Corelogic data retrieved")
    list(corelogic = corelogic, postcode_sa3_mapping = postcode_sa3_mapping)
  }
  result <- corelogic_sa3_data_F()
  corelogic_dt <- result$corelogic
  postcode_sa3_mapping <- result$postcode_sa3_mapping
  cli::cli_alert("Constructing price changes from corelogic data")
  postcode_date_combinations <- sec_data[
    , unique(.SD), .SDcols = c("property_postcode", "property_type", "valuation_month")] %>% 
    na.omit() %>% 
    data.table::setkey()
  price_update_dt <- postcode_date_combinations %>% 
    merge(postcode_sa3_mapping, 
          by.x = "property_postcode", by.y = "POSTCODE_2017",
          all.x = TRUE, allow.cartesian = TRUE) %>% 
    merge(corelogic_dt, all.x = TRUE,
          by.x = c("property_type", "SA3_CODE_2016", "valuation_month"), 
          by.y = c("property_type", "r_sa3", "ref_date")) %>% 
    .[, metric_name := NULL] %>% 
    data.table::setnames("calculation_value", "index_at_valuation") %>% 
    merge(corelogic_dt[ref_date == month], all.x = TRUE,
          by.x = c("property_type", "SA3_CODE_2016"), 
          by.y = c("property_type", "r_sa3")) %>% 
    .[, metric_name := NULL] %>% 
    .[, ref_date := NULL] %>% 
    data.table::setnames("calculation_value", "index_current") %>% 
    .[, price_change := index_current / index_at_valuation] %>% 
    .[, 
      list(price_change = sum(price_change * RATIO, na.rm = TRUE) / 
             sum(RATIO * (!is.na(price_change) & !is.nan(price_change)), na.rm = TRUE)
      ), keyby = c("property_postcode", "property_type", "valuation_month")]
  cli::cli_alert_success("Price changes constructed")
  price_update_dt
}
###======================================================================================
# Called by generate_LVR_inputs_F
aggregate_securitisation_to_property_level_F <- function(sec_data) {
  sec_data %>% 
    .[!is.na(property_value_updated) & property_value_updated > 0] %>% 
    .[, ":=" (
      balance_offset_adjusted = sum(cur_balance_w_offset, na.rm = TRUE), 
      balance = sum(current_balance, na.rm = TRUE)
    ), by = c("group_id", "report_month", "most_recent_property_value", "valuation_month")] %>% 
    .[, head(.SD, n = 1), by = c("group_id", "report_month", "most_recent_property_value", "valuation_month")] %>% 
    .[, lvr_updated := balance_offset_adjusted / property_value_updated]  
}
###======================================================================================
# Called by generate_LVR_inputs_F
clean_and_format_secdata_F <- function(secdata, pmtrs, bankname_key) {
  y <- secdata %>% data.table %>% 
    .[, bank := bankname_key[match(.[, sponsor], securitisation_data), 
                             stress_test_model]] %>%
    .[!is.na(lvr_updated) & !is.na(original_ltv), ] %>%
    .[bank %in% pmtrs$banks_list, ] %>%
    .[lvr_updated > 0 & lvr_updated <= 2.5] %>%
    .[, c('bank', 'cur_balance_w_offset', 'current_balance', 
          'property_type', 'property_postcode',
          'original_property_value', 'origination_month', 'original_ltv', 'seasoning',
          'property_value_updated', 'lvr_updated')] %>%
    .[, c('lvr_crnt_bucket', 'lvr_orig_bucket') := 
        list(as.integer(floor(lvr_updated*100)), as.integer(floor(original_ltv)))]
  leftout_banks <- pmtrs$banks_list[!pmtrs$banks_list %in% unique(y$bank)]
  if(length(leftout_banks > 0)) {
    warning(paste(paste(leftout_banks, collapse=' '), 
                  'are missing from the securitisation data.'))
    }
  return(y)
}
###======================================================================================
# Called by generate_LVR_inputs_F
generate_lvr_distribution_F <- function(secdata) {
  lvr_crnt_values <- secdata %>% 
    .[, .(value = sum(cur_balance_w_offset)), by=list(bank, lvr_crnt_bucket)]
  lvr_orig_values <- secdata %>% 
    .[, .(value = sum(cur_balance_w_offset)), by=list(bank, lvr_orig_bucket)]
  y <- expand.grid(lvr=as.integer(0:250), bank=unique(secdata$bank)) %>%
    data.table %>% .[, c('bank', 'lvr')] %>%
    merge(lvr_crnt_values, by.x=c('bank', 'lvr'), by.y=c('bank', 'lvr_crnt_bucket'), 
          all.x=TRUE) %>% setnames('value', 'value_crnt_lvr') %>%
    merge(lvr_orig_values, by.x=c('bank', 'lvr'), by.y=c('bank', 'lvr_orig_bucket'), 
          all.x=TRUE) %>% setnames('value', 'value_orig_lvr') %>%
    .[, lapply(.SD, function(x) {ifelse(is.na(x), 0, x)} )] %>%
    .[, lvr := round(lvr/100, 2)]
  return(y)
}
# Called by generate_LVR_inputs_F
saffi_sec_comparison_F <- function(ST_bank, saffi, lvr_distributions) {
  sec <- lvr_distributions[bank == ST_bank, ] %>% .[order(lvr), ]
  wide_bkts <- data.table(lvr_min=c(0, 0.6, 0.8, 0.85, 0.9, 0.95),
                          lvr_max=c(0.59, 0.79, 0.84, 0.89, 0.94, Inf)) %>%
    .[, saf_orig_values := 
        unlist(saffi[, c('bsh_outs_res_mort_LVR0to60', 'bsh_outs_res_mort_LVR60to80',
                         'bsh_outs_res_mort_LVR80to85', 'bsh_outs_res_mort_LVR85to90',
                         'bsh_outs_res_mort_LVR90to95', 
                         'bsh_outs_res_mort_LVR95up')])] %>%
    .[, saf_prpns := saf_orig_values/sum(saf_orig_values)] 
  wide_bkts$sec_orig_values <- 
    apply(wide_bkts, 1, function(x) sum(sec[lvr >= x['lvr_min'] & lvr <= x['lvr_max'], 
                                            value_orig_lvr]))
  wide_bkts[, sec_prpns := sec_orig_values/sum(sec_orig_values)] %>%
    .[, saf_prpn_over_sec_prpn := saf_prpns/sec_prpns]
  return(wide_bkts)
}  
###======================================================================================
# Called by generate_LVR_inputs_F
format_LVR_inputs_F <- function(ST_bank, saffi, lvr_distributions, mult_fit) {
  
  sec <- lvr_distributions[bank == ST_bank, ] %>% .[order(lvr), ]
  wide_bkts <- data.table(lvr_min=c(0, 0.6, 0.8, 0.81, 0.85, 0.9, 0.95),
                          lvr_max=c(0.59, 0.79, 0.8, 0.84, 0.89, 0.94, Inf)) %>%
    .[, saf_new_values := 
        unlist(saffi[, c('bsh_new_res_mort_LVR0to60', 'bsh_new_res_mort_LVR60to80',
                         'bsh_new_res_mort_LVR80to81','bsh_new_res_mort_LVR81to85', 
                         'bsh_new_res_mort_LVR85to90', 'bsh_new_res_mort_LVR90to95', 
                         'bsh_new_res_mort_LVR95up')])] %>%
    .[, saf_prpns := saf_new_values/sum(saf_new_values)] 
  wide_bkts$sec_orig_values <- 
    apply(wide_bkts, 1, function(x) sum(sec[lvr >= x['lvr_min'] & lvr <= x['lvr_max'], 
                                            value_orig_lvr]))
  wide_bkts[, sec_prpns := sec_orig_values/sum(sec_orig_values)] %>%
    .[, ratio := saf_prpns/sec_prpns]
  y <- sec[, lvr_dist := value_crnt_lvr/sum(value_crnt_lvr)] %>% 
    .[, lvr_orig_dist := value_orig_lvr/sum(value_orig_lvr)]
  y <- y[, lvr_flow_dist := 
           sapply(1:nrow(y), function(i) y[i, lvr_orig_dist] * 
                    wide_bkts[lvr_min <= y[i, lvr] & lvr_max >= y[i, lvr], ratio])] %>%
    .[, c('lvr', 'lvr_dist', 'lvr_flow_dist')]
  
  y[, multiplier := predict(mult_fit, newdata=data.frame(LVR=y[, lvr]))] %>%
    .[, multiplier := multiplier/sum(multiplier * lvr_dist)] %>%
    setnames('lvr', 'LVR') %>%
    data.frame(stringsAsFactors=FALSE)
  return(y)
}  
