/*******************************************************************************

	15_oecd_memcov.do
	
	This file contains code that uses union membership and coverage data from
	the Organisation for Economic Co-operation and Development to produce
	Figure 4 in Bishop J and I Chan (2019), Is Declining Union Membership
	Contributing to Low Wages Growth?, RBA Research Discussion Paper No 2019-02.
	Output will be saved to fig4.xlsx.

	The package labutil is needed for this code to run.
	
*******************************************************************************/

clear all
set more off

local dir  "<path for 'data' folder here>"

cd "`dir'"

/*******************
 A. Union density
*******************/

import delimited using oecd_uniondensity, clear

keep cou* v6 series year value
encode v6, gen(source)
drop v6
label variable source "Data source (1=admin, 2=survey)"
reshape wide value, i(year cou* source) j(series) string
reshape wide value*, i(year cou*) j(source)
sort cou year

rename value* *
rename (TUD* TUM* EEM*) (unionden* unionmem* empl*)

labvarch *, subs("valueTUD" "Union density (%)")
labvarch *, subs("valueTUM" "Union members ('000s)")
labvarch *, subs("valueEEM" "Employees ('000s)")
labvarch *1, subs("1 " "")
labvarch *1, postfix(", admin data")
labvarch *2, subs("2 " "")
labvarch *2, postfix(", svy data")

// Integrate survey and admin measures, preferring all admin, then prioritising data from single consistent source
egen counta = sum(!missing(unionden1) & missing(unionden2)), by(cou) // Number of years where admin data exist but not survey data
egen countb = sum(!missing(unionden2) & missing(unionden1)), by(cou) // Number of years where survey data exist but not admin data
egen count0 = count(year), by(cou) // Available data years (across all sources)
egen count1 = count(unionden1), by(cou) // Available data years for admin data
egen count2 = count(unionden2), by(cou) // Available data years for survey data

foreach v in unionden empl unionmem {
	gen `v' = `v'1 if !missing(unionden1) & count1 == count0 // Use all admin data if available for all data years
	replace `v' = `v'2 if missing(`v') & !missing(unionden2) & count2 == count0 // then all survey data if available for all data years
	replace `v' = `v'1 if missing(`v') & !missing(unionden1) & count1 > count2 // then admin data if available for more years than survey data
	replace `v' = `v'2 if missing(`v') & !missing(unionden2) & count1 < count2 // then survey data if avialable ofr more years than admin data
	replace `v' = `v'1 if missing(`v') & !missing(unionden1) & count1 == count2 // then admin data if equal availability
}
drop count?
label variable unionden "Union density (% of employees), mixed data"
label variable unionmem "Union members ('000s), mixed data"
label variable empl "Employees ('000s), mixed data"

rename country country_name
encode country_name, gen(country)
tsset country year
tsfill, full // enable easier detection of missing data-years
sort country year

foreach v in cou country_name {
	by country: replace `v' = `v'[_n-1] if missing(`v')
	by country: replace `v' = `v'[_N] if missing(`v')
}

tempfile oecd_unionden
save "`oecd_unionden'"


/**********************************
 B. Colletive bargaining coverage
**********************************/
 
import delimited using oecd_collectivecov, clear

keep cou* year val
rename country country_name
label variable val "Percentage of employees with the right to bargain"
rename val collcov

merge 1:1 cou year using "`oecd_unionden'", force
drop _merge
replace country = 99 if country_name == "OECD - Total"
label define country 99 "OECD Total", add

sort country year
foreach v in cou country_name {
	by country: replace `v' = `v'[_n-1] if missing(`v')
	by country: replace `v' = `v'[_N] if missing(`v')
}

sort country year
tsset country year
tsfill, full // enable easier detection of missing data-year
foreach v in cou country_name {
	by country: replace `v' = `v'[_n-1] if missing(`v')
	by country: replace `v' = `v'[_N] if missing(`v')
}

order country cou country_name year
sort country year

* Estimate OECD average (weighted by employees) for union density
egen double a = total(unionden * empl), by(year) 
egen double b = total(empl), by(year) 
gen unionden_oecd_wavg = a/b if year < 2017
drop a b
replace unionden = unionden_oecd_wavg if country == 99 // use weighted average
label variable unionden_oecd_wavg "Employee-weighted avge of all OECD countries with available data in given year"

tempfile oecd_all
save "`oecd_all'"


/**************************************************************
 C. Calculate percentage point change and export for Figure 4
**************************************************************/

* Calculate percentage change for selected countries
keep year cou* collcov unionden
keep if inlist(cou, "AUS", "CAN", "DEU", "FRA", "JPN", "NZL", "GBR", "USA", "OTO")

* Merge in paysetting data for Australia for ex-award measure of collective bargaining
merge m:1 year using paysetting, keepusing(empl_shexomie_col) // Merge in ex-awards measure for Australia
drop _merge
replace empl_shexomie_col = . if cou!="AUS"
sort cou year
drop if year == 2018
forval y = 2000/2016{
	egen collcov_`y' = total(collcov*(year==`y')), by(cou)
	replace collcov_`y' = . if collcov_`y' == 0
	egen collcovexawd_`y' = total(empl_shexomie_col*(year==`y')) if cou=="AUS" // Ex-award measure for AUS
	replace collcovexawd_`y' = . if collcovexawd_`y' == 0
	egen unionden_`y' = total(unionden*(year==`y')), by(cou)
	replace unionden_`y' = . if unionden_`y' == 0
}
gen collcov_ch = collcov_2016 - collcov_2000
replace collcov_ch = collcov_2014 - collcov_2004 if cou=="FRA" // Account for data availability for France
gen collcovexawd_ch = collcovexawd_2016 - collcovexawd_2000
gen unionden_ch = unionden_2016 - unionden_2000
replace unionden_ch = unionden_2014 - unionden_2004 if cou=="FRA" // Account for data availability for France

ds country year *ch, not
drop `r(varlist)'
keep if year == 2016
drop year
order country collcov_ unionden_ collcovexawd
label variable collcov_ch "Change in collective agr 2000 to 2016 (pp); 2004 to 2014 for France"
label variable unionden_ch "Change in union density 2000 to 2016 (pp); 2004 to 2014 for France"
label variable collcov_ch "Change in collective agr (ex awards) 2000 to 2016 (pp); 2004 to 2014 for France"

* Export
export excel fig4.xlsx, sheet(fig4) firstrow(varlabels) sheetreplace

* end of do file
