# Title: Build combined data for regressions
# Creator: Calvin He
# Date Created: 01 July 2019

build_combined_data <- function(core_logic_data_raw, monetary_policy_shocks_raw, grouping_variables, opts){
  
  # core_logic data and manipulations
  corelogic_sa3_data <- core_logic_data_raw %>%
    {if (opts$metro_only) {filter(., metro ==TRUE)} else{.}  } %>% # apply metro filter
    {if (!opts$group_var %in% c("price_group", "vol")) {left_join(., grouping_variables %>% select(SA3_CODE_2016, opts$group_var), by = "SA3_CODE_2016")} 
      else if ( opts$group_var == "vol") {core_logic_add_volatility_deciles(., prop_type =  opts$prop_type)}
      else {core_logic_add_price_groups(., group_no = opts$group_no, prop_type = opts$prop_type   , years = opts$price_group_defn)} } %>% # define price groups
    filter(metric_name %in% opts$metric_dependent, property_type == opts$prop_type, 
           Date >= opts$start_date, Date <= opts$end_date, 
           month(Date) %% opts$frequency ==0 ) %>%
    core_logic_add_groups(., opts$group_var, group_no = opts$group_no) %>%  # break group into deciles/quantiles
    filter(!is.na(group)) %>% 
    {if (opts$metric_deflate == TRUE) {
      deflate_series(. , "calculation_value") %>%  # deflate calculation value
        mutate(calculation_value_orig = calculation_value,
               calculation_value = ifelse(metric_name == opts$metric_dependent,
                                          real_calculation_value, calculation_value)) # keep orig cal value, and replace if metric name matches options
      
    }else {.} }
  
  
  # Monetary policy shock data and manipulations    
  # pick your shock
  monetary_policy_shock_chosen <-  monetary_policy_shocks_raw[[opts$mp_shock_type]] %>% 
    dplyr::select(Date, contains("mp_shock"))
  colnames(monetary_policy_shock_chosen) <- c("Date", "mp_shock") # change column names
  
  # quarterly accumulation (just in case shock chosen is monthly)
  if(opts$mp_shocks_accumulate){
    monetary_policy_shock_chosen <- monetary_policy_shock_chosen %>% 
      mutate(Date = last_day_quarter(Date) ) %>% 
      group_by(Date) %>%
      summarise_all(.,sum, na.rm=TRUE) %>%
      ungroup
  }
  
  
  # Combine
  reg_data_pre <- left_join(corelogic_sa3_data, monetary_policy_shock_chosen, by="Date" ) 
  
  return(reg_data_pre)
  
}


# Add price groups to corelogic data
core_logic_add_price_groups <- function(df, group_no = 10, prop_type ="D" , years = c(1990,1994), min_volume = 10 ){
  
  # Create decile groups via median sale price
  core_logic_groupings <- df %>% filter(property_type== prop_type, 
                                        metric_name %in% c("median_sales_price", "sales_volume"),
                                        year(Date) %in% years) %>% 
    dcast(. , region_name + Date  ~  metric_name, value.var = "calculation_value") %>%
    dplyr::group_by(region_name) %>% 
    dplyr::summarise(average_sales_price = mean(median_sales_price, na.rm=TRUE), volume_sum= sum(sales_volume, na.rm=TRUE)) %>%
    dplyr::filter(volume_sum > min_volume) %>%
    ungroup() %>% 
    dplyr::mutate(price_group = ntile(.$average_sales_price , group_no))
  
  # left_join with dataframe
  df_mod <- left_join(df, core_logic_groupings %>% select(region_name, average_sales_price, price_group), by="region_name")
  
  return(df_mod)
}


# Add groups based on a grouping variable value
core_logic_add_groups <- function(df, group_var, group_no = 10 ){
  if (group_var == "price_group"){
    df_mod <- df %>% mutate(group = price_group)
  } else {
    df_mod <- df %>% 
      {mutate(., group = ntile(.[[group_var]], group_no))}
  }
  
  return(df_mod)
  
}
