/**************************************************************/
/*  Program created by James Bishop during RBA inposting in  
    April 2016. 
    It cleans the data and creates a single, longitudinal 
	database at the firm level.
***************************************************************/

set more off

cd "\\sasnasprd\SASData\W386\RBA\dta_files\empdata"

use empdata_Mar, clear
append using empdata_Jun
append using empdata_Sep
append using empdata_Dec

*erase empdata_Mar.dta
*erase empdata_Jun.dta
*erase empdata_Sep.dta
*erase empdata_Dec.dta

* drop unnecessary variables 

#delimit ;

drop 

pid
cid
state_of
response
delete_f
date_la0
export_c
date_la1
import_c
usi_sour
usi_feed
usi_regi
usi_resp
usi_dacc
usi_esti
usi_sel_
osvbmept
ognbmept
unitflg
offcmt1
offcmt2
offcmt3
offcmtd
rspcmt1
rspcmt2
rspcmt3
rspcmt4
newunflg
editflg
person
bundno
formtype
attflg
attcmt
musrn
bmempt
bmunkind
stratno
in1_53
qt1stfw
yr1stfw
randstrt
skip
ssstrano
ssstrasz
ppsprob
runame
spmsg
ctsyttle
nmsrtkey
attnttle
addln1
addln2
addln3
addln4
contname
contitle
stdcode
phoneno
stdfax
faxno
mobile
extsta
extdat
dispstrm
distrcmt
lbuind
buflg
pgflg
rtsflg
rtsdate
rtscmt
sampflg
frmhrs
frmmin
seastart
seaend
i_status
frpanel
new_stra
totemp_1
seatemp
proporti
s_abn
s_eg_id
s_b_date
s_fqtr
s_randno
s_lb_id
s_inds
s_mstate
s_state
s_anzsic
s_mn_emp
s_emp
s_tolo
s_sisca
s_sector
s_npi
s_log
s_ind17
s_ind53
s_panel
s_ltnre
s_lpi
email
s_anzsic06
s_ind06
s_sisca06
contcmt
contnam2
contitl2
stdcod2
phonen2
stdfax2
faxno2
mobile2
email2
contcmt2
pgflgalt
acflag
formpsi
ipsform
datemarkin
formamended ;

#delimit cr


* CLEAN VARIABLES

format psi qt1stsmp yr1stsmp mutolo mustianz musianz06 stateest agreejob noqtrnr %10.0g
format abn %15.0g
	   	       
* Survey year/quarter variables
gen  quarter = substr(survey,1,3)
gen  year    = substr(survey,4,2)
drop survey

destring year, replace
replace  year=year+1900 if year>=96 & year<=99
replace  year=year+2000 if year >=0 & year<=20

replace  quarter="1" if quarter=="Mar"
replace  quarter="2" if quarter=="Jun"
replace  quarter="3" if quarter=="Sep"
replace  quarter="4" if quarter=="Dec"
destring quarter, replace

gen surveyq = yq(year, quarter)
format surveyq %tq

sort unitid surveyq

* Quarter first selected in sample
gen surveystsmp = yq(yr1stsmp, qt1stsmp)
format surveystsmp %tq
drop yr1stsmp qt1stsmp

* Variable that will be used to merge to jobdata
gen merge_id = unitid

save empdata_panel, replace

* unitid naming conventions changed from 2003q3 (moved to AT, MU prefix). 
* Change historical data so that only one unitid relates to any single firm
keep    old_unit unitid
keep if old_unit!=""
duplicates drop
rename  unitid   unitid_future
rename  old_unit unitid 
save unitid_historic, replace
clear

use empdata_panel, clear
merge m:1 unitid using unitid_historic
drop if _merge==2
drop    _merge
replace unitid=unitid_future if unitid_future!=""
drop unitid_future old_unit old_dig


* Firm name
gen name = name1 + " " + name2
drop name1 name2

* ABN
* Sometimes ABN was only disclosed in a later survey. Fill in the history
bysort unitid: egen double abn_max=max(abn)
bysort unitid: egen double abn_min=min(abn)
replace abn = abn_max if abn==. & abn_max==abn_min
* Note ABN in not unique and for some firms it changes over time
drop abn_min abn_max

* Postcode
* NB: postcode refers to mailing address of payroll, rather than of job
replace  postcode="" if postcode=="?" | postcode=="XXXX" | postcode=="."
replace  postcode = subinstr(postcode, "a", "",.) 
destring postcode, replace

* Employment, excluding seasonal and temporary
gen     empl = .
replace empl = totempee if totempee>=0 & toemdate>=date_loa & toemdate<=date_las & (toemdate!=. & date_loa!=. & date_las!=.)
replace empl = totempee if totempee>=0 & toemdate==date_loa                      & (toemdate!=. & date_loa!=. & date_las==.)
/*
The above measure of employment will only have non-missing in quarters in which employment data are newly updated. 
In general, this is the September quarter (see tabulations below). There are some non-missing employment
data in other quarters given that employment is also collected upon initialisation and sometimes in a later quarter
if the firm does not respond to the employment questions in September.
*/
gen     empl_nonmiss = 0
replace empl_nonmiss=1 if empl!=.
tab     empl_nonmiss quarter

* Employment, including seasonal and temporary
gen     empl_st =  empl + seas_emp
replace empl_st =  repempee if empl_st==. & totempee>=0 & toemdate>=date_loa & toemdate<=date_las & (toemdate!=. & date_loa!=. & date_las!=.) 
replace empl_st =  repempee if empl_st==. & totempee>=0 & toemdate==date_loa                      & (toemdate!=. & date_loa!=. & date_las==.)

/*
renempee = totempee + seas_emp 
 
 Prior to 2003q3 instead of asking for the specific number of seasonal/temp employes, we asked a more general 
 question about  what percentage of the total employees were seasonal/temp (over 10%, up to 10% or Nil). 'seatemp'
 Totempee should always have excluded seasonal/temp employees but it looks like previously we altered the figure 
 manually rather than calculating it in the system.
*/

drop totempee repempee seas_emp toemdate empl_nonmiss

drop defnilqt dfnilcmt dateocc nildectd daterisn c_status


* Industry

* ANZSIC 2006, Division (B-S)
tab in1_19
/*
A Agriculture, Forestry and Fishing
B Mining
C Manufacturing
D Electricity, Gas, Water and Waste Services
E Construction
F Wholesale Trade
G Retail Trade
H Accommodation and Food Services
I Transport, Postal and Warehousing
J Information Media and Telecommunications
K Financial and Insurance Services
L Rental, Hiring and Real Estate Services
M Professional, Scientific and Technical Services
N Administrative and Support Services
O Public Administration and Safety
P Education and Training
Q Health Care and Social Assistance
R Arts and Recreation Services
S Other Services
*/

* ANZSIC 1993, Division (A-Q)
tab in1_17
/*
A: Agriculture, Forestry and Fishing 
B: Mining 
C: Manufacturing
D: Electricity, Gas and Water Supply
E: Construction 
F: Wholesale Trade
G: Retail Trade 
H: Accommodation, Cafes and Restaurants 
I: Transport and Storage
J: Communication Services 
K: Finance and Insurance 
L: Property and Business Services
M: Government Administration and Defence 
N: Education 
O: Health and Community Services 
P: Cultural and Recreational Services 
Q: Personal and Other Services 
*/


* Prior to 2007q3, only ANZSIC 1993 classifications are available. From 2007q3 onwards, both
* classifications are available
bysort surveyq: sum mustianz musianz06


* SISCA and TOLO
/*
http://www.abs.gov.au/ausstats/abs@.nsf/Lookup/40F31FE6397B8AA3CA257BDD001163AC?opendocument

Standard Institutional Sector Classification of Australia (SISCA)

 A classification of institutional units which provides a framework for dividing the Australian 
 economy into institutional sectors. These sectors group units which have similar economic 
 functions and structural characteristics.

Type of Legal Organisation (TOLO)

 The TOLO classification is used to classify institutional units according to the type of legal 
 organisation that best describes their structure.
*/

tab mutolo
tab musisca
tab musisca06

* Prior to 2007q3, only SISCA 1998 classifications are available. From 2007q3 onwards, both
* classifications are available
bysort surveyq: sum musisca musisca06


/*
TOLO 

1 Incorporated Private Sector Entities 
 11 Proprietary Companies
 12 Public Companies
 13 Other Incorporated Entities
2 Unincorporated Private Sector Entities 
 21 Sole Proprietorship
 22 Family Partnerships
 23 Other Partnerships
 24 Trusts Regarded as Corporations
 25 Other Trusts
 26 Other Unincorporated Entities
3 Public Sector Entities 
 31 Government Companies
 32 Other Government Entities
 33 Foreign Government Entities
*/


* Completely enumerated flag
* Employment is generally much larger for CE firms
tab ceflg
sum empl if ceflg==""
sum empl if ceflg=="C"


* Give all firm-level variables an f_ prefix
foreach var of varlist _all {
rename `var' f_`var'
}
*

rename f_surveyq  surveyq
rename f_merge_id merge_id

* Drop all remaining non-crucial variables
drop f_surveystsmp f_quarter f_year f_chkdigit f_locality

#delimit ;

order

f_unitid 
surveyq
f_name 
f_empl 
f_empl_st
f_stateest
f_ceflg
f_multenti  
f_agreejob 
f_mustianz
f_musianz06 
f_in1_19 
f_in1_17 
f_musisca
f_musisca06
f_mutolo
f_abn
f_state 
f_postcode 
f_psi 
f_date_loa 
f_date_las 
f_noqtrnr ;

#delimit cr

sort merge_id surveyq

save empdata_panel, replace

clear

* end of do file

