# Load and clean SME/Large base data ####

## Load business lending data ####

business_lending_orig <- read_csv(paste0(input_path, "preliminary_individual_institution_with_back_history.csv")) %>%
  select(institution_name, institution_code, classification, period, 
         initial_allowance_rounded, supplementary_allowance_rounded, additional_allowance_rounded, 
         funding_allowance_rounded, lending_to_small_medium_businesses_SME, lending_to_small_medium_businesses_SME_3mma, 
         lending_to_small_medium_businesses_SME_change_in_3mma, lending_to_large_business, lending_to_large_business_3mma,
         lending_to_large_business_change_in_3mma, SME_lending_adhoc, large_bus_lending_adhoc) %>%
  rename(SME_credit = lending_to_small_medium_businesses_SME, 
         SME_credit_3mma = lending_to_small_medium_businesses_SME_3mma, 
         SME_credit_change_3mma = lending_to_small_medium_businesses_SME_change_in_3mma,
         largebus_credit = lending_to_large_business, 
         largebus_credit_3mma = lending_to_large_business_3mma, 
         largebus_credit_change_3mma = lending_to_large_business_change_in_3mma,
         SME_credit_adhoc = SME_lending_adhoc, 
         largebus_credit_adhoc = large_bus_lending_adhoc) %>%
  rowwise() %>%
  mutate(totalbus_credit = ifelse(is.na(SME_credit) & is.na(largebus_credit), 
                                  NA, 
                                  sum(SME_credit, largebus_credit, na.rm = TRUE)),
         totalbus_credit_3mma = ifelse(is.na(SME_credit_3mma) & is.na(largebus_credit_3mma), 
                                       NA, 
                                       sum(SME_credit_3mma, largebus_credit_3mma, na.rm = TRUE))) %>%
  ungroup()

# Banks with business lending ##

business_lending_banks <- business_lending_orig %>%
  filter(!is.na(totalbus_credit)) %>%
  distinct(institution_code, .keep_all = TRUE) %>%
  select(institution_code) %>%
  mutate(business_lending_bank = 1)

# Ad hoc reporters ##

adhoc_reporters <- business_lending_orig %>%
  mutate(adhoc_submitter = ifelse(!is.na(SME_credit_adhoc) | !is.na(largebus_credit_adhoc), 1, 0)) %>%
  filter(adhoc_submitter == 1) %>%
  distinct(institution_code, .keep.all = TRUE) %>%
  mutate(adhoc_submitter = 1) %>%
  ungroup() %>%
  select(institution_code, adhoc_submitter)

## Base period business credit data

lending_base <- business_lending_orig %>%
  filter(period == "2020-01-31" | period == "2019-12-31") %>%
  arrange(desc(period)) %>%
  distinct(institution_code, .keep_all = TRUE) %>%
  rename(SME_credit_base = SME_credit_3mma) %>%
  rename(largebus_credit_base = largebus_credit_3mma) %>%
  rename(totalbus_credit_base = totalbus_credit_3mma) %>%
  select(institution_code, SME_credit_base, largebus_credit_base, totalbus_credit_base)

## Calculate growth rates from base period

business_lending <- full_join(business_lending_orig, lending_base, by = c("institution_code")) %>%
  left_join(adhoc_reporters, by = c("institution_code")) %>%
  left_join(business_lending_banks, by = c("institution_code"))


business_lending_prebind <- business_lending %>%
  mutate(time_dummy = ifelse(period >= "2020-03-31", 1, 0)) %>%
  mutate(SME_credit_growth_cum = (SME_credit / SME_credit_base) * 100 - 100) %>%
  mutate(largebus_credit_growth_cum = (largebus_credit / largebus_credit_base) * 100 - 100) %>%
  mutate(SME_credit_growth_cum_3mma = (SME_credit_3mma / SME_credit_base) * 100 - 100) %>%
  mutate(largebus_credit_growth_cum_3mma = (largebus_credit_3mma / largebus_credit_base) * 100 - 100) %>%
  mutate(totalbus_credit_growth_cum = (totalbus_credit / totalbus_credit_base) * 100 - 100) %>%
  mutate(totalbus_credit_growth_cum_3mma = (totalbus_credit / totalbus_credit_base) * 100 - 100)

sme_lending <- business_lending_prebind %>%
  mutate(business_credit = SME_credit) %>%
  mutate(business_credit_base = SME_credit_base) %>%
  mutate(credit_growth_cum = SME_credit_growth_cum) %>%
  mutate(credit_growth_cum_3mma = SME_credit_growth_cum_3mma) %>%
  mutate(SME_borrower = 1) %>%
  mutate(borrower_type = "SME")

large_business_lending <- business_lending_prebind %>%
  mutate(business_credit = largebus_credit) %>%
  mutate(business_credit_base = largebus_credit_base) %>%
  mutate(credit_growth_cum = largebus_credit_growth_cum) %>%
  mutate(credit_growth_cum_3mma = largebus_credit_growth_cum_3mma) %>%
  mutate(SME_borrower = 0) %>%
  mutate(borrower_type = "large business")

business_lending <- bind_rows(sme_lending, large_business_lending) %>%
  arrange(institution_name) %>%
  mutate(time_SME_borrower = time_dummy * SME_borrower)

rm(lending_base, business_lending_orig)

# Merge data set ##

business_lending <- left_join(business_lending, 
                              df_bank_characteristics %>%  
                                select(-funding_allowance_rounded, -business_lending_bank), 
                              by = c("institution_code", "institution_name"))

# Retain a copy of full dataset (i.e. with outliers)

business_lending_full <- business_lending

write_csv(business_lending_full, file = paste0(output_path, "business_lending_data", Sys.Date(), ".csv"), na = "")

# Exclude outliers for main regression model and date filters

business_lending <- business_lending %>%
  filter(institution_code != "Outlier 1") %>%
  filter(institution_code != "Outlier 2") %>%
  filter(period >= first_month & period <= last_month)

# Join TFF usage

tff_usage_forjoining <- tff_usage %>%
  select(institution_code, accessed_tff, drawdown_assets, drawdown_final_millions)

business_lending <- left_join(business_lending, tff_usage_forjoining, by = c("institution_code"))
