*************


clear all

set more off

global data ""
global results ""


use "$data\Unit data (not for release)\MI Unit Record Data.dta", clear

cd "$results"


* Set up dataset as survey data to use weights
svyset [pw=weight]

*change data to quarterly 
gen qr=.
replace qr=1 if month==1 | month==2 | month==3
replace qr=2 if month==4 | month==5 | month==6
replace qr=3 if month==7 | month==8 | month==9
replace qr=4 if month==10 | month==11 | month==12




gen exp_zero = exp if exp==0

gen exp_uncertain5 = exp if inlist(exp,-95,-85,-75,-65,-55,-45,-35,-25,-15,-5,5,15,25,35,45,55,65,75,85,95)

gen exp_uncertain10 = exp if inlist(exp,-100,-90,-80,-70,-60,-50,-40,-30,-20,-10,10,20,30,40,50,60,70,80,90,100)

egen exp_uncertain1 = rowmax(exp_zero exp_uncertain5 exp_uncertain10) // includes zeros 

gen exp_certain1=exp if exp_uncertain1==.

*creating new certainty variable to include those who report zeroes
egen exp_uncertain = rowmax(exp_uncertain5 exp_uncertain10) //excludes zeros

gen exp_certain=exp if exp_uncertain==. //inlcudes zeros



gen InfExp=exp_certain

drop if exp>50 | exp<-50


preserve
collapse (mean) InfExp [aw=Weight], by (year month)
export excel using "$results\exp_Aug2024_update.xls", sheet("mean") firstrow (variables)
restore



****stats for rounds only

preserve
collapse (mean) exp_uncertain exp_uncertain1 , by (year month)
export excel using "$results\rounds_only.xls", sheet("unweighted_mean") firstrow (variables)
restore


gen age55plus=(age==6 | age==7 | age==8)
gen age35_54=(age==3 | age==4 | age==5)
gen age18_34=(age==1 | age==2) 




gen age_group="."
replace age_group="18_34" if age18_34==1
replace age_group="35_54" if age35_54==1
replace age_group="55_plus" if age55plus==1



preserve
collapse (mean) exp InfExp [aw=weight], by (year month age_group)
export excel using "$results\all_data_with_zeros_2024_weighted.xls", sheet("age_group", replace) firstrow (variables)
restore



gen lowinc=(hincome==1 | hincome==2 | hincome==3 )
gen midinc=(hincome==4 | hincome==5 | hincome==6 | hincome==7 | hincome==8 | hincome==9)
gen highinc=(hincome==10 | hincome==11 | hincome==12 | hincome==13 | hincome==14 | hincome==15)

sort year month

gen incomeg="."
replace incomeg="low" if lowinc==1
replace incomeg="mid" if midinc==1
replace incomeg="high" if highinc==1


preserve
drop if incomeg=="."
collapse (mean) exp InfExp [aw=weight], by (year month incomeg)
export excel using "$results\all_data_with_zeros_2024_weighted.xls", sheet("income", replace) firstrow (variables)
restore



gen housing="."
replace housing="renters" if homeown==1
replace housing="mortgagers" if homeown==2
replace housing="owners" if homeown==3



preserve
drop if housing=="."
collapse (mean) exp InfExp [aw=weight], by (year month housing)
export excel using "$results\all_data_with_zeros_2024_weighted.xls", sheet("housing", replace) firstrow (variables)
restore




******************************************
*		30% TRIMMED MEAN AND DISTRIBUTION
******************************************
tostring year month, replace
gen IDN= year+month
destring year month IDN, replace


sort IDN
* Generate cumulative price distribution 
by IDN: cumul exp [aweight=weight], generate(cumfexp)	
* Resort otherwise breaks in cumfwexp (where weights are zero) ruin calcs
sort IDN cumfexp 

*drop normweight
* Generate normalised weights
by IDN: gen normweight = cumfexp if _n == 1
by IDN: replace normweight = cumfexp[_n] - cumfexp[_n-1] if _n != 1

* Generate weights for 30% trimmed mean
by IDN: gen trimexp = .
by IDN: replace trimexp = exp if cumfexp >= 0.15 & cumfexp <= 0.85
by IDN: gen trimweight = .
by IDN: replace trimweight = normweight if cumfexp >= 0.15 & cumfexp <= 0.85
* Adjust for high trim border and low trim border
by IDN: replace trimweight = 0.85 - cumfexp[_n-1] if trimexp[_n] == . & trimexp[_n-1] != .
by IDN: replace trimexp = exp if trimexp[_n] == . & trimweight[_n] != .
by IDN: replace trimweight = cumfexp - 0.15 if trimexp[_n] != . & trimexp[_n-1] == .
* Generate variable to check only 70% leftover
egen trimcheck = sum(trimweight), by(IDN)
* Rescale weights to 100%
by IDN: replace trimweight = trimweight/0.7



gen expn=InfExp

by IDN: cumul expn [aweight=weight], generate(cumfexpn)	
* Resort otherwise breaks in cumfwexp (where weights are zero) ruin calcs
sort IDN cumfexpn 

*drop normweight
* Generate normalised weights
by IDN: gen normweightn = cumfexpn if _n == 1
by IDN: replace normweightn = cumfexpn[_n] - cumfexpn[_n-1] if _n != 1

* Generate weights for 30% trimmed mean
by IDN: gen trimexpn = .
by IDN: replace trimexpn = expn if cumfexpn >= 0.15 & cumfexpn <= 0.85
by IDN: gen trimweightn = .
by IDN: replace trimweightn = normweightn if cumfexpn >= 0.15 & cumfexpn <= 0.85
* Adjust for high trim border and low trim border
by IDN: replace trimweightn = 0.85 - cumfexpn[_n-1] if trimexpn[_n] == . & trimexpn[_n-1] != .
by IDN: replace trimexpn = expn if trimexpn[_n] == . & trimweightn[_n] != .
by IDN: replace trimweightn = cumfexpn - 0.15 if trimexpn[_n] != . & trimexpn[_n-1] == .
* Generate variable to check only 70% leftover
egen trimcheckn = sum(trimweightn), by(IDN)
* Rescale weights to 100%
by IDN: replace trimweightn = trimweightn/0.7







preserve
collapse (mean) trimexp trimexpn [aw=weight], by (year month)
export excel using "$results\all_data_with_zeros_2024_weighted.xls", sheet("trimmean") firstrow (variables)
restore


sort IDN
* Generate cumulative price distribution 
by IDN: cumul wexp [aweight=weight], generate(cumfwexp)	
* Resort otherwise breaks in cumfwexp (where weights are zero) ruin calcs
sort IDN cumfwexp 

*drop normweight
* Generate normalised weights
by IDN: gen normweightw = cumfwexp if _n == 1
by IDN: replace normweightw = cumfwexp[_n] - cumfwexp[_n-1] if _n != 1

* Generate weights for 30% trimmed mean
by IDN: gen trimwexp = .
by IDN: replace trimwexp = wexp if cumfwexp >= 0.15 & cumfwexp <= 0.85
by IDN: gen trimweightw = .
by IDN: replace trimweightw = normweightw if cumfwexp >= 0.15 & cumfwexp <= 0.85
* Adjust for high trim border and low trim border
by IDN: replace trimweightw = 0.85 - cumfwexp[_n-1] if trimwexp[_n] == . & trimwexp[_n-1] != .
by IDN: replace trimwexp = wexp if trimwexp[_n] == . & trimweightw[_n] != .
by IDN: replace trimweightw = cumfwexp - 0.15 if trimwexp[_n] != . & trimwexp[_n-1] == .
* Generate variable to check only 70% leftover
egen trimcheckw = sum(trimweightw), by(IDN)
* Rescale weights to 100%
by IDN: replace trimweightw = trimweightw/0.7

