/*******************************************************************************

	2_clean.do
	
	This file contains code to clean the WAD data and create some basic
	variables used in the analysis in Bishop J and I Chan (2019), Is Declining
	Union Membership Contributing to Low Wages Growth?, RBA Research Discussion
	Paper No 2019-02.
	
*******************************************************************************/

clear all
set more off

local dir  "<path for 'data' folder here>"

cd "`dir'"

/*********************************************************************************************
 Generate variables
*********************************************************************************************/	

use wad_extract, clear

* set start date of agreement as earlier of firs_incr, cert_date and comm_date

gen start_date = min(firs_incr, cert_date, comm_date) 
gen end_date   = min(term_date, max(expi_date, last_incr))

format start_date %td
format end_date   %td

* create the merging variables, based on start_date
	
gen start_y = year(start_date)
gen start_q = quarter(start_date)
gen start_m = month(start_date)
gen surveyq = yq(start_y, start_q)
gen surveym = ym(start_y, start_m)
format surveyq %tq
format surveym %tm
drop start_y start_q start_m

replace state=upper(states)

* dummies for if a given state is mentioned

foreach x in NSW VIC QLD SA WA TAS NT ACT TER AUS DK {
gen     state_`x' = 0
replace state_`x' = 1 if strpos(states, "`x'")
}
*
* mutually exclusive dummies, with category for 'multiple states'
gen     state_code=1  if states=="NSW"
replace state_code=2  if states=="VIC"
replace state_code=3  if states=="QLD"
replace state_code=4  if states=="SA"
replace state_code=5  if states=="WA"
replace state_code=6  if states=="TAS"
replace state_code=7  if states=="NT"
replace state_code=8  if states=="ACT"
replace state_code=9  if states=="TER"
replace state_code=10 if state_code==. & states!=""

label define state_name 1 "NSW" 2 "VIC" 3 "QLD" 4 "SA" 5 "WA" 6 "TAS" 7 "NT" 8 "ACT" 9 "TER" 10 "Multi state"

label values state_code state_name

* ANZSIC divisions

gen     anzsic06_1dig = .
replace anzsic06_1dig = 1  if anzsic06>= 1  & anzsic06 <=5
replace anzsic06_1dig = 2  if anzsic06>= 6  & anzsic06<=10
replace anzsic06_1dig = 3  if anzsic06>= 11 & anzsic06<=25
replace anzsic06_1dig = 4  if anzsic06>= 26 & anzsic06<=29
replace anzsic06_1dig = 5  if anzsic06>= 30 & anzsic06<=32
replace anzsic06_1dig = 6  if anzsic06>= 33 & anzsic06<=38
replace anzsic06_1dig = 7  if anzsic06>= 39 & anzsic06<=43
replace anzsic06_1dig = 8  if anzsic06>= 44 & anzsic06<=45
replace anzsic06_1dig = 9  if anzsic06>= 46 & anzsic06<=53
replace anzsic06_1dig = 10 if anzsic06>= 54 & anzsic06<=60
replace anzsic06_1dig = 11 if anzsic06>= 62 & anzsic06<=64
replace anzsic06_1dig = 12 if anzsic06>= 66 & anzsic06<=67
replace anzsic06_1dig = 13 if anzsic06>= 69 & anzsic06<=70
replace anzsic06_1dig = 14 if anzsic06>= 72 & anzsic06<=73
replace anzsic06_1dig = 15 if anzsic06>= 75 & anzsic06<=77
replace anzsic06_1dig = 16 if anzsic06>= 80 & anzsic06<=82
replace anzsic06_1dig = 17 if anzsic06>= 84 & anzsic06<=87
replace anzsic06_1dig = 18 if anzsic06>= 89 & anzsic06<=92
replace anzsic06_1dig = 19 if anzsic06>= 94 & anzsic06<=96

gen     anzsic93_1dig = .
replace anzsic93_1dig = 1  if anzsic93>= 1  & anzsic93 <=4
replace anzsic93_1dig = 2  if anzsic93>= 11 & anzsic93<=15
replace anzsic93_1dig = 3  if anzsic93>= 21 & anzsic93<=29
replace anzsic93_1dig = 4  if anzsic93>= 36 & anzsic93<=37
replace anzsic93_1dig = 5  if anzsic93>= 41 & anzsic93<=42
replace anzsic93_1dig = 6  if anzsic93>= 45 & anzsic93<=47
replace anzsic93_1dig = 7  if anzsic93>= 51 & anzsic93<=53
replace anzsic93_1dig = 8  if anzsic93== 57
replace anzsic93_1dig = 9  if anzsic93>= 61 & anzsic93<=67
replace anzsic93_1dig = 10 if anzsic93== 71
replace anzsic93_1dig = 11 if anzsic93>= 73 & anzsic93<=75
replace anzsic93_1dig = 12 if anzsic93>= 77 & anzsic93<=78
replace anzsic93_1dig = 13 if anzsic93>= 81 & anzsic93<=82
replace anzsic93_1dig = 14 if anzsic93== 84
replace anzsic93_1dig = 15 if anzsic93>= 86 & anzsic93<=87
replace anzsic93_1dig = 16 if anzsic93>= 91 & anzsic93<=93
replace anzsic93_1dig = 17 if anzsic93>= 95 & anzsic93<=97

*  1 digit industry classification (based on ANZSIC93 categories)
*  Collapse the property and business services category to get a consistent series over time

gen anzsic_1dig = anzsic06_1dig 

replace anzsic_1dig = 12 if anzsic06_1dig==13 | anzsic06_1dig==14
replace anzsic_1dig = 13 if anzsic06_1dig==15
replace anzsic_1dig = 14 if anzsic06_1dig==16
replace anzsic_1dig = 15 if anzsic06_1dig==17
replace anzsic_1dig = 16 if anzsic06_1dig==18
replace anzsic_1dig = 17 if anzsic06_1dig==19

replace anzsic_1dig = anzsic93_1dig if anzsic_1dig==.

label define anzsic 1 "AGR" 2 "MIN" 3 "MAN" 4 "ELE" 5 "CON" 6 "WHO" 7 "RET" 8 "ACC" 9 "TRA" 10 "CCS" 11 "FIN" 12 "PBS" 13 "GOV" 14 "EDU" 15 "HTH" 16 "CRS" 17 "POS"

label values anzsic_1dig anzsic

* Public sector dummy

gen     public = 0 if sector!=""
replace public = 1 if sector=="Public"

* dummies for whether union represented workers in wage bargaining
* base category is no union representation

gen union = unions!=""

* dummies for whether agreement includes performance pay

gen performancepay = 0
foreach x in performancepay_indiv performancepay_group performancepay_allemp performancepay_unclear performancepay_team {
replace performancepay=1 if `x'=="Yes"
}
*
* one-off bonus

gen     bonus_cond = 0
replace bonus_cond = 1 if   oneoffbonus_cond  =="Yes"

gen     bonus_uncond = 0
replace bonus_uncond = 1 if oneoffbonus_uncond=="Yes"

drop unions sector performancepay_indiv performancepay_group performancepay_allemp performancepay_unclear performancepay_team oneoffbonus_cond oneoffbonus_uncond

compress

save wad_extract_clean, replace

clear

*end of do file
