## --- EM Algorithm --- 
## Implement Stock & Watson (2002) EM algorithm to handle missing data
## Author: P Hendy

# Generate a graph of missing data
missing <- df_graph_stationary %>% mutate(ismissing = ifelse(!is.na(value),0,1)) %>%
  group_by(Date) %>%
  summarise(propthere = 100-100*mean(ismissing))

# By AE vs EM
missing_em <- df_graph_stationary %>% mutate(ismissing = ifelse(!is.na(value),0,1), isem=ifelse(!(variable %in% other_em_variables) | variable %in% china_variables, "Emerging Market","Advanced Economy")) %>%
  group_by(Date, isem) %>%
  summarise(propthere = mean(ismissing)) %>%
  mutate(propthere = ifelse(isem=="Advanced Economy", 68.25397-propthere*(68.25397), 31.74603-propthere*(31.74603)))
missing_em$isem <- factor(missing_em$isem, levels = c("Emerging Market", "Advanced Economy"))

# Graph 1
write.csv(missing_em %>% pivot_wider(names_from=isem, values_from=propthere) %>%
  mutate(`Emerging Market` = `Advanced Economy` + `Emerging Market`), file=paste0(data_output_folder, 'Graph 1.csv'))

# Run EM PCA algorithm to impute missing data
df_global_qtr_stationary_nomiss <- as.data.frame(missMDA::imputePCA(df_global_qtr_stationary %>% dplyr::select(-Date), ncp=2, scale=T, method="Regularized")$completeObs) %>%
  cbind(as.Date(df_global_qtr_stationary$Date)) %>%
  rename("Date" = "as.Date(df_global_qtr_stationary$Date)") %>%
  dplyr::select(Date, everything())
