/*******************************************************************************

	15_HILDA.do
	
	This file contains code that extracts union membership and pay-setting data
	from Department of Social Services; Melbourne Institute of Applied Economic
	and Social Research (2018), The Household, Income and Labour Dynamics in
	Australia (HILDA) Survey, RESTRICTED RELEASE 17 (Waves 1-17)'. The code
	produces the data for Figure 5 of Bishop J and I Chan (2019), Is Declining
	Union Membership Contributing to Low Wages Growth?, RBA Research Discussion
	Paper No 2019-02, and for the share of employees on awards and the share on
	individual agreements who are union members in Section 3 of the paper.

*******************************************************************************/

clear all
set more off

local dir  "<path for 'data' folder here>"

cd "`dir'"

/*******************************************************************************
 A. Import HILDA data from 2009 onwards
*******************************************************************************/
local latestwave = 17
local hildadir "<path for HILDA data here>"

local alphalist a b c d e f g h i j k l m n o p q r s t u v w x y z
tokenize `alphalist'
forval i = 9/`latestwave' {
	local prefix `prefix' ``i''
}

* Variables to keep
local	varstokeep esempst jbmpays jbmtabs hhwtrp hhwtrpm

local i = 8
foreach w of local prefix {
	use "`hildadir'\Combined_`w'`latestwave'0u"

	renpfix `w'      // Strip off wave prefix
	local i = `i'+1  // Increase (wave) counter by 1
 	gen year = 2000 + `i'   // Create year indicator

	if ("`varstokeep'"!="") {
		local tokeep								// empty to keep list
		foreach var of local varstokeep {			// loop over all selected variables
			capture confirm variable `var'			// check whether variable exists in current wave
			if (!_rc) local tokeep `tokeep' `var'	// mark for inclusion if variable exists
			}
		keep xwaveid year `tokeep'				// keep selected variables
        }

	* Save as temporary data file
	tempfile tempdata_`w'
	save "`tempdata_`w''"
}

* Appends the temporary data files for each wave to create (unbalanced) panel
clear
foreach w of local prefix {
	append using "`tempdata_`w''"
	}

* Generate population weights excluding 2011 top-up sample in survey
gen double wrp_excl11 = hhwtrpm
replace wrp_excl11 = hhwtrp if year < 2011 & missing(hhwtrpm)
recode wrp_excl11 (. = 0)
label variable wrp_excl11 "Responding person population weight (excludes 2011 top-up)"

* Restrict sample of to employees who are not employees/employer of own business
* or self-employed
keep if esempst == 1 


/*******************************************************************************
 B. Calculate share of workers on awards/individual arrangements who are union members
*******************************************************************************/
* Generate indicator variable for union membership (don't know = no)
gen union = (jbmtabs == 1)

* Get 2017 union member shares respectively for those whose pay was set by 
* enterprise agreements, award and individual agreement for Section 3 of RDP.
* Numbers in the paper exclude the 2011 top-up sample for consistency with Fig 5.

	* Award:
tab year union [aw = wrp_excl11] if year == 2017 & wrp_excl11 >= 0 & jbmpays == 4, row // Ex 2011 top-up
tab year union [aw = hhwtrp] if year == 2017 & hhwtrp >=0 & jbmpays == 4, row // Incl top-up

	* Individual arrangement:
tab year union [aw = wrp_excl11] if year == 2017 & wrp_excl11 >= 0 & jbmpays == 2, row // Ex 2011 top-up
tab year union [aw = hhwtrp] if year == 2017 & hhwtrp >= 0 & jbmpays == 2, row // Incl top-up for comparison


/*******************************************************************************
 C. Calculate share of workers on enterprise agreements who are union members
*******************************************************************************/

* Restrict sample to those whose pay is set exclusively by enterprise agreements
* for figure 5
keep if jbmpays == 1

* Collapse data (with population weights) and get share
collapse (sum) wrp_excl11, by (year union)
rename wrp_excl11 empl 
reshape wide empl, i(year) j(union)
gen double union_sh = empl1 / (empl0 + empl1) * 100

* Export to Excel
label variable year "Year"
label variable union_sh "Percentage of employees (ex self-empl) on EBAs who are union members"

export excel year union_sh using fig5.xlsx, sheet("fig5") cell(A1) sheetreplace firstrow(varl)

* end of do file
