*******************************************
*******************************************
* STRUCTURE
* A) WORKPLACE PREPARATION
* B) COMPUTE VARIABLES
* C) DATA TRANSFORMATION FOR REGRESSION ANALYSIS
* D) DESCRIPTIVE STATISTICS
* E) DROPOUT ANALYSIS 
* F) PSYCHOMETRICS
* G) EXPORT DATASET CREATION
*******************************************
*******************************************



******************************************
******************************************
*** A -  WORKPLACE PREPARATION ***
******************************************
******************************************



******************************************
*** Stata Preparation  ***
******************************************

log close _all

log using "log.log",  replace name(log)

set more off, perm


** Install Add-In Mvpatterns
** Info: https://stats.idre.ucla.edu/stata/faq/how-can-i-see-the-number-of-missing-values-and-patterns-of-missing-values-in-my-data-file/
** Accessed: 15. Sep 2019

search mvpatterns

** Install Add-In fapara
** Info: https://stats.idre.ucla.edu/stata/faq/how-to-do-parallel-analysis-for-pca-or-factor-analysis-in-stata/
** Accessed: 15. Sep 2019

search fapara



******************************************
*** Create Study Dataset  ***
******************************************

* Raw dataset has to be placed in same folder as Stata do-file
use Raw_Data.dta, clear
	
save Study_Data.dta, replace

use Study_Data.dta, clear











******************************************
******************************************
*** B - COMPUTE VARIABLES ***
******************************************
******************************************



******************************************
*** Compute Goal Conflict Scale  ***
******************************************


** New variable gc_04

alpha taa12 taa26 taa38 taa47, generate(gc_04)


** Set scale value to missing, if more than one item is missing

mvpatterns taa12 taa26 taa38 taa47

generate gc04mv = 0
replace gc04mv = 1 if missing(taa12)

replace gc04mv = 2 if missing(taa26) & gc04mv==1
replace gc04mv = 1 if missing(taa26) & gc04mv==0

replace gc04mv = 3 if missing(taa38) & gc04mv==2
replace gc04mv = 2 if missing(taa38) & gc04mv==1
replace gc04mv = 1 if missing(taa38) & gc04mv==0

replace gc04mv = 4 if missing(taa47) & gc04mv==3
replace gc04mv = 3 if missing(taa47) & gc04mv==2
replace gc04mv = 2 if missing(taa47) & gc04mv==1
replace gc04mv = 1 if missing(taa47) & gc04mv==0

replace gc_04 = . if gc04mv > 1

** Set all items to missing if more than one item is missing (to accurately compute reliability measures)

replace taa12 = . if gc04mv > 1
replace taa26 = . if gc04mv > 1
replace taa38 = . if gc04mv > 1
replace taa47 = . if gc04mv > 1


** New variable gc_05

alpha  taa74_2005 taa75_2005 taa76_2005 taa77_2005, generate(gc_05)


** Set scale value to missing, if more than one item is missing

mvpatterns taa74_2005 taa75_2005 taa76_2005 taa77_2005

generate gc05mv = 0
replace gc05mv = 1 if missing(taa74_2005)

replace gc05mv = 2 if missing(taa75_2005) & gc05mv==1
replace gc05mv = 1 if missing(taa75_2005) & gc05mv==0

replace gc05mv = 3 if missing(taa76_2005) & gc05mv==2
replace gc05mv = 2 if missing(taa76_2005) & gc05mv==1
replace gc05mv = 1 if missing(taa76_2005) & gc05mv==0

replace gc05mv = 4 if missing(taa77_2005) & gc05mv==3
replace gc05mv = 3 if missing(taa77_2005) & gc05mv==2
replace gc05mv = 2 if missing(taa77_2005) & gc05mv==1
replace gc05mv = 1 if missing(taa77_2005) & gc05mv==0

replace gc_05 = . if gc05mv > 1


** Set all items to missing if more than one item is missing (to accurately compute reliability measures)

replace taa74_2005 = . if gc05mv > 1
replace taa75_2005 = . if gc05mv > 1
replace taa76_2005 = . if gc05mv > 1
replace taa77_2005 = . if gc05mv > 1




** New variable gc_07

alpha  taa74_2007 taa75_2007 taa76_2007 taa77_2007, generate(gc_07)


** Set scale value to missing, if more than one item is missing

mvpatterns taa74_2007 taa75_2007 taa76_2007 taa77_2007

generate gc07mv = 0
replace gc07mv = 1 if missing(taa74_2007)

replace gc07mv = 2 if missing(taa75_2007) & gc07mv==1
replace gc07mv = 1 if missing(taa75_2007) & gc07mv==0

replace gc07mv = 3 if missing(taa76_2007) & gc07mv==2
replace gc07mv = 2 if missing(taa76_2007) & gc07mv==1
replace gc07mv = 1 if missing(taa76_2007) & gc07mv==0

replace gc07mv = 4 if missing(taa77_2007) & gc07mv==3
replace gc07mv = 3 if missing(taa77_2007) & gc07mv==2
replace gc07mv = 2 if missing(taa77_2007) & gc07mv==1
replace gc07mv = 1 if missing(taa77_2007) & gc07mv==0

replace gc_07 = . if gc07mv > 1


** Set all items to missing if more than one item is missing (to accurately compute reliability measures)

replace taa74_2007 = . if gc07mv > 1
replace taa75_2007 = . if gc07mv > 1
replace taa76_2007 = . if gc07mv > 1
replace taa77_2007 = . if gc07mv > 1




** New variable gc_14

alpha  taa74_2014 taa75_2014 taa76_2014 taa77_2014, generate(gc_14)

** Set scale value to missing, if more than one item is missing

mvpatterns taa74_2014 taa75_2014 taa76_2014 taa77_2014

generate gc14mv = 0
replace gc14mv = 1 if missing(taa74_2014)

replace gc14mv = 2 if missing(taa75_2014) & gc14mv==1
replace gc14mv = 1 if missing(taa75_2014) & gc14mv==0

replace gc14mv = 3 if missing(taa76_2014) & gc14mv==2
replace gc14mv = 2 if missing(taa76_2014) & gc14mv==1
replace gc14mv = 1 if missing(taa76_2014) & gc14mv==0

replace gc14mv = 4 if missing(taa77_2014) & gc14mv==3
replace gc14mv = 3 if missing(taa77_2014) & gc14mv==2
replace gc14mv = 2 if missing(taa77_2014) & gc14mv==1
replace gc14mv = 1 if missing(taa77_2014) & gc14mv==0

replace gc_14 = . if gc14mv > 1


** Set all items to missing if more than one item is missing (to accurately compute reliability measures)

replace taa74_2014 = . if gc14mv > 1
replace taa75_2014 = . if gc14mv > 1
replace taa76_2014 = . if gc14mv > 1
replace taa77_2014 = . if gc14mv > 1





******************************************
*** Compute Individual Averages and Deviations ***
*** For Goal Conflict Scale ***
******************************************

** New variable avg_gc
alpha gc_04 gc_05 gc_07 gc_14, generate(avg_gc)

** New variables avg_gc_04, avg_gc_05, avg_gc_07 and avg_gc_14
gen avg_gc_04 =  avg_gc
gen avg_gc_05 = avg_gc
gen avg_gc_07 = avg_gc
gen avg_gc_14 = avg_gc


** New Variable dev_gc_04
gen dev_gc_04=gc_04-avg_gc

** New Variable dev_gc_05
gen dev_gc_05=gc_05-avg_gc

** New Variable dev_gc_07
gen dev_gc_07=gc_07-avg_gc

** New Variable dev_gc_14
gen dev_gc_14=gc_14-avg_gc





******************************************
*** Define Job Satisfaction Variables  ***
******************************************


** New variable js_04

gen js_04 = az1

** New variable js_05

gen js_05 = az1_2005


** New variable js_07

gen js_07 = az1_2007


** New variable js_14

gen js_14 = az1_2014





******************************************
*** Compute Work Engagement Scale  ***
******************************************


** New variable we_04


alpha enga1 enga4 enga5 enga7 enga8 enga9 enga10 enga11 enga12, generate(we_04)


** Set scale value to missing, if more than two items are missing

mvpatterns enga1 enga4 enga5 enga7 enga8 enga9 enga10 enga11 enga12

generate we04mv = 0
replace we04mv = 1 if missing(enga1)

replace we04mv = 2 if missing(enga4) & we04mv==1
replace we04mv = 1 if missing(enga4) & we04mv==0

replace we04mv = 3 if missing(enga5) & we04mv==2
replace we04mv = 2 if missing(enga5) & we04mv==1
replace we04mv = 1 if missing(enga5) & we04mv==0

replace we04mv = 4 if missing(enga7) & we04mv==3
replace we04mv = 3 if missing(enga7) & we04mv==2
replace we04mv = 2 if missing(enga7) & we04mv==1
replace we04mv = 1 if missing(enga7) & we04mv==0

replace we04mv = 5 if missing(enga8) & we04mv==4
replace we04mv = 4 if missing(enga8) & we04mv==3
replace we04mv = 3 if missing(enga8) & we04mv==2
replace we04mv = 2 if missing(enga8) & we04mv==1
replace we04mv = 1 if missing(enga8) & we04mv==0

replace we04mv = 6 if missing(enga9) & we04mv==5
replace we04mv = 5 if missing(enga9) & we04mv==4
replace we04mv = 4 if missing(enga9) & we04mv==3
replace we04mv = 3 if missing(enga9) & we04mv==2
replace we04mv = 2 if missing(enga9) & we04mv==1
replace we04mv = 1 if missing(enga9) & we04mv==0

replace we04mv = 7 if missing(enga10) & we04mv==6
replace we04mv = 6 if missing(enga10) & we04mv==5
replace we04mv = 5 if missing(enga10) & we04mv==4
replace we04mv = 4 if missing(enga10) & we04mv==3
replace we04mv = 3 if missing(enga10) & we04mv==2
replace we04mv = 2 if missing(enga10) & we04mv==1
replace we04mv = 1 if missing(enga10) & we04mv==0

replace we04mv = 8 if missing(enga11) & we04mv==7
replace we04mv = 7 if missing(enga11) & we04mv==6
replace we04mv = 6 if missing(enga11) & we04mv==5
replace we04mv = 5 if missing(enga11) & we04mv==4
replace we04mv = 4 if missing(enga11) & we04mv==3
replace we04mv = 3 if missing(enga11) & we04mv==2
replace we04mv = 2 if missing(enga11) & we04mv==1
replace we04mv = 1 if missing(enga11) & we04mv==0

replace we04mv = 9 if missing(enga12) & we04mv==8
replace we04mv = 8 if missing(enga12) & we04mv==7
replace we04mv = 7 if missing(enga12) & we04mv==6
replace we04mv = 6 if missing(enga12) & we04mv==5
replace we04mv = 5 if missing(enga12) & we04mv==4
replace we04mv = 4 if missing(enga12) & we04mv==3
replace we04mv = 3 if missing(enga12) & we04mv==2
replace we04mv = 2 if missing(enga12) & we04mv==1
replace we04mv = 1 if missing(enga12) & we04mv==0

replace we_04 = . if we04mv > 2

** Set all items to missing if more than two items are missing (to accurately compute reliability measures)

replace enga1 = . if we04mv > 2
replace enga4 = . if we04mv > 2
replace enga5 = . if we04mv > 2
replace enga7 = . if we04mv > 2
replace enga8 = . if we04mv > 2
replace enga9 = . if we04mv > 2
replace enga10 = . if we04mv > 2
replace enga11 = . if we04mv > 2
replace enga12 = . if we04mv > 2


** New variable we_05

alpha  enga1_2005 enga4_2005 enga5_2005 enga7_2005 enga8_2005 enga9_2005 enga10_2005 enga11_2005 enga12_2005, generate(we_05)

** Set scale value to missing, if more than two items are missing

mvpatterns enga1_2005 enga4_2005 enga5_2005 enga7_2005 enga8_2005 enga9_2005 enga10_2005 enga11_2005 enga12_2005

generate we05mv = 0
replace we05mv = 1 if missing(enga1_2005)

replace we05mv = 2 if missing(enga4_2005) & we05mv==1
replace we05mv = 1 if missing(enga4_2005) & we05mv==0

replace we05mv = 3 if missing(enga5_2005) & we05mv==2
replace we05mv = 2 if missing(enga5_2005) & we05mv==1
replace we05mv = 1 if missing(enga5_2005) & we05mv==0

replace we05mv = 4 if missing(enga7_2005) & we05mv==3
replace we05mv = 3 if missing(enga7_2005) & we05mv==2
replace we05mv = 2 if missing(enga7_2005) & we05mv==1
replace we05mv = 1 if missing(enga7_2005) & we05mv==0

replace we05mv = 5 if missing(enga8_2005) & we05mv==4
replace we05mv = 4 if missing(enga8_2005) & we05mv==3
replace we05mv = 3 if missing(enga8_2005) & we05mv==2
replace we05mv = 2 if missing(enga8_2005) & we05mv==1
replace we05mv = 1 if missing(enga8_2005) & we05mv==0

replace we05mv = 6 if missing(enga9_2005) & we05mv==5
replace we05mv = 5 if missing(enga9_2005) & we05mv==4
replace we05mv = 4 if missing(enga9_2005) & we05mv==3
replace we05mv = 3 if missing(enga9_2005) & we05mv==2
replace we05mv = 2 if missing(enga9_2005) & we05mv==1
replace we05mv = 1 if missing(enga9_2005) & we05mv==0

replace we05mv = 7 if missing(enga10_2005) & we05mv==6
replace we05mv = 6 if missing(enga10_2005) & we05mv==5
replace we05mv = 5 if missing(enga10_2005) & we05mv==4
replace we05mv = 4 if missing(enga10_2005) & we05mv==3
replace we05mv = 3 if missing(enga10_2005) & we05mv==2
replace we05mv = 2 if missing(enga10_2005) & we05mv==1
replace we05mv = 1 if missing(enga10_2005) & we05mv==0

replace we05mv = 8 if missing(enga11_2005) & we05mv==7
replace we05mv = 7 if missing(enga11_2005) & we05mv==6
replace we05mv = 6 if missing(enga11_2005) & we05mv==5
replace we05mv = 5 if missing(enga11_2005) & we05mv==4
replace we05mv = 4 if missing(enga11_2005) & we05mv==3
replace we05mv = 3 if missing(enga11_2005) & we05mv==2
replace we05mv = 2 if missing(enga11_2005) & we05mv==1
replace we05mv = 1 if missing(enga11_2005) & we05mv==0

replace we05mv = 9 if missing(enga12_2005) & we05mv==8
replace we05mv = 8 if missing(enga12_2005) & we05mv==7
replace we05mv = 7 if missing(enga12_2005) & we05mv==6
replace we05mv = 6 if missing(enga12_2005) & we05mv==5
replace we05mv = 5 if missing(enga12_2005) & we05mv==4
replace we05mv = 4 if missing(enga12_2005) & we05mv==3
replace we05mv = 3 if missing(enga12_2005) & we05mv==2
replace we05mv = 2 if missing(enga12_2005) & we05mv==1
replace we05mv = 1 if missing(enga12_2005) & we05mv==0

replace we_05 = . if we05mv > 2


** Set all items to missing if more than two items are missing (to accurately compute reliability measures)

replace enga1_2005 = . if we05mv > 2
replace enga4_2005 = . if we05mv > 2
replace enga5_2005 = . if we05mv > 2
replace enga7_2005 = . if we05mv > 2
replace enga8_2005 = . if we05mv > 2
replace enga9_2005 = . if we05mv > 2
replace enga10_2005 = . if we05mv > 2
replace enga11_2005 = . if we05mv > 2
replace enga12_2005 = . if we05mv > 2




** New variable we_07


alpha  enga1_2007 enga4_2007 enga5_2007 enga7_2007 enga8_2007 enga9_2007 enga10_2007 enga11_2007 enga12_2007, generate(we_07) 

** Set scale value to missing, if more than two items are missing

mvpatterns enga1_2007 enga4_2007 enga5_2007 enga7_2007 enga8_2007 enga9_2007 enga10_2007 enga11_2007 enga12_2007

generate we07mv = 0
replace we07mv = 1 if missing(enga1_2007)

replace we07mv = 2 if missing(enga4_2007) & we07mv==1
replace we07mv = 1 if missing(enga4_2007) & we07mv==0

replace we07mv = 3 if missing(enga5_2007) & we07mv==2
replace we07mv = 2 if missing(enga5_2007) & we07mv==1
replace we07mv = 1 if missing(enga5_2007) & we07mv==0

replace we07mv = 4 if missing(enga7_2007) & we07mv==3
replace we07mv = 3 if missing(enga7_2007) & we07mv==2
replace we07mv = 2 if missing(enga7_2007) & we07mv==1
replace we07mv = 1 if missing(enga7_2007) & we07mv==0

replace we07mv = 5 if missing(enga8_2007) & we07mv==4
replace we07mv = 4 if missing(enga8_2007) & we07mv==3
replace we07mv = 3 if missing(enga8_2007) & we07mv==2
replace we07mv = 2 if missing(enga8_2007) & we07mv==1
replace we07mv = 1 if missing(enga8_2007) & we07mv==0

replace we07mv = 6 if missing(enga9_2007) & we07mv==5
replace we07mv = 5 if missing(enga9_2007) & we07mv==4
replace we07mv = 4 if missing(enga9_2007) & we07mv==3
replace we07mv = 3 if missing(enga9_2007) & we07mv==2
replace we07mv = 2 if missing(enga9_2007) & we07mv==1
replace we07mv = 1 if missing(enga9_2007) & we07mv==0

replace we07mv = 7 if missing(enga10_2007) & we07mv==6
replace we07mv = 6 if missing(enga10_2007) & we07mv==5
replace we07mv = 5 if missing(enga10_2007) & we07mv==4
replace we07mv = 4 if missing(enga10_2007) & we07mv==3
replace we07mv = 3 if missing(enga10_2007) & we07mv==2
replace we07mv = 2 if missing(enga10_2007) & we07mv==1
replace we07mv = 1 if missing(enga10_2007) & we07mv==0

replace we07mv = 8 if missing(enga11_2007) & we07mv==7
replace we07mv = 7 if missing(enga11_2007) & we07mv==6
replace we07mv = 6 if missing(enga11_2007) & we07mv==5
replace we07mv = 5 if missing(enga11_2007) & we07mv==4
replace we07mv = 4 if missing(enga11_2007) & we07mv==3
replace we07mv = 3 if missing(enga11_2007) & we07mv==2
replace we07mv = 2 if missing(enga11_2007) & we07mv==1
replace we07mv = 1 if missing(enga11_2007) & we07mv==0

replace we07mv = 9 if missing(enga12_2007) & we07mv==8
replace we07mv = 8 if missing(enga12_2007) & we07mv==7
replace we07mv = 7 if missing(enga12_2007) & we07mv==6
replace we07mv = 6 if missing(enga12_2007) & we07mv==5
replace we07mv = 5 if missing(enga12_2007) & we07mv==4
replace we07mv = 4 if missing(enga12_2007) & we07mv==3
replace we07mv = 3 if missing(enga12_2007) & we07mv==2
replace we07mv = 2 if missing(enga12_2007) & we07mv==1
replace we07mv = 1 if missing(enga12_2007) & we07mv==0

replace we_07 = . if we07mv > 2


** Set all items to missing if more than two items are missing (to accurately compute reliability measures)


replace enga1_2007 = . if we07mv > 2
replace enga4_2007 = . if we07mv > 2
replace enga5_2007 = . if we07mv > 2
replace enga7_2007 = . if we07mv > 2
replace enga8_2007 = . if we07mv > 2
replace enga9_2007 = . if we07mv > 2
replace enga10_2007 = . if we07mv > 2
replace enga11_2007 = . if we07mv > 2
replace enga12_2007 = . if we07mv > 2


** New variable we_14

alpha  enga1_2014 enga4_2014 enga5_2014 enga7_2014 enga8_2014 enga9_2014 enga10_2014 enga11_2014 enga12_2014, generate(we_14) 


** Set scale value to missing, if more than two items are missing


mvpatterns enga1_2014 enga4_2014 enga5_2014 enga7_2014 enga8_2014 enga9_2014 enga10_2014 enga11_2014 enga12_2014

generate we14mv = 0
replace we14mv = 1 if missing(enga1_2014)

replace we14mv = 2 if missing(enga4_2014) & we14mv==1
replace we14mv = 1 if missing(enga4_2014) & we14mv==0

replace we14mv = 3 if missing(enga5_2014) & we14mv==2
replace we14mv = 2 if missing(enga5_2014) & we14mv==1
replace we14mv = 1 if missing(enga5_2014) & we14mv==0

replace we14mv = 4 if missing(enga7_2014) & we14mv==3
replace we14mv = 3 if missing(enga7_2014) & we14mv==2
replace we14mv = 2 if missing(enga7_2014) & we14mv==1
replace we14mv = 1 if missing(enga7_2014) & we14mv==0

replace we14mv = 5 if missing(enga8_2014) & we14mv==4
replace we14mv = 4 if missing(enga8_2014) & we14mv==3
replace we14mv = 3 if missing(enga8_2014) & we14mv==2
replace we14mv = 2 if missing(enga8_2014) & we14mv==1
replace we14mv = 1 if missing(enga8_2014) & we14mv==0

replace we14mv = 6 if missing(enga9_2014) & we14mv==5
replace we14mv = 5 if missing(enga9_2014) & we14mv==4
replace we14mv = 4 if missing(enga9_2014) & we14mv==3
replace we14mv = 3 if missing(enga9_2014) & we14mv==2
replace we14mv = 2 if missing(enga9_2014) & we14mv==1
replace we14mv = 1 if missing(enga9_2014) & we14mv==0

replace we14mv = 7 if missing(enga10_2014) & we14mv==6
replace we14mv = 6 if missing(enga10_2014) & we14mv==5
replace we14mv = 5 if missing(enga10_2014) & we14mv==4
replace we14mv = 4 if missing(enga10_2014) & we14mv==3
replace we14mv = 3 if missing(enga10_2014) & we14mv==2
replace we14mv = 2 if missing(enga10_2014) & we14mv==1
replace we14mv = 1 if missing(enga10_2014) & we14mv==0

replace we14mv = 8 if missing(enga11_2014) & we14mv==7
replace we14mv = 7 if missing(enga11_2014) & we14mv==6
replace we14mv = 6 if missing(enga11_2014) & we14mv==5
replace we14mv = 5 if missing(enga11_2014) & we14mv==4
replace we14mv = 4 if missing(enga11_2014) & we14mv==3
replace we14mv = 3 if missing(enga11_2014) & we14mv==2
replace we14mv = 2 if missing(enga11_2014) & we14mv==1
replace we14mv = 1 if missing(enga11_2014) & we14mv==0

replace we14mv = 9 if missing(enga12_2014) & we14mv==8
replace we14mv = 8 if missing(enga12_2014) & we14mv==7
replace we14mv = 7 if missing(enga12_2014) & we14mv==6
replace we14mv = 6 if missing(enga12_2014) & we14mv==5
replace we14mv = 5 if missing(enga12_2014) & we14mv==4
replace we14mv = 4 if missing(enga12_2014) & we14mv==3
replace we14mv = 3 if missing(enga12_2014) & we14mv==2
replace we14mv = 2 if missing(enga12_2014) & we14mv==1
replace we14mv = 1 if missing(enga12_2014) & we14mv==0

replace we_14 = . if we14mv > 2


** Set all items to missing if more than two items are missing (to accurately compute reliability measures)

replace enga1_2014 = . if we14mv > 2
replace enga4_2014 = . if we14mv > 2
replace enga5_2014 = . if we14mv > 2
replace enga7_2014 = . if we14mv > 2
replace enga8_2014 = . if we14mv > 2
replace enga9_2014 = . if we14mv > 2
replace enga10_2014 = . if we14mv > 2
replace enga11_2014 = . if we14mv > 2
replace enga12_2014 = . if we14mv > 2



******************************************
*** Compute Depressive Symptoms Scale  ***
******************************************


** Info: In the raw data set euthymia items are stored with reversed polarity


** New variable de_04

alpha std1 std2 std3 std4 std5 std6 std7 std8 std9 std10, generate(de_04) 


** Set scale value to missing, if more than two items are missing

mvpatterns std1 std2 std3 std4 std5 std6 std7 std8 std9 std10

generate de04mv = 0
replace de04mv = 1 if missing(std1)

replace de04mv = 2 if missing(std2) & de04mv==1
replace de04mv = 1 if missing(std2) & de04mv==0

replace de04mv = 3 if missing(std3) & de04mv==2
replace de04mv = 2 if missing(std3) & de04mv==1
replace de04mv = 1 if missing(std3) & de04mv==0

replace de04mv = 4 if missing(std4) & de04mv==3
replace de04mv = 3 if missing(std4) & de04mv==2
replace de04mv = 2 if missing(std4) & de04mv==1
replace de04mv = 1 if missing(std4) & de04mv==0

replace de04mv = 5 if missing(std5) & de04mv==4
replace de04mv = 4 if missing(std5) & de04mv==3
replace de04mv = 3 if missing(std5) & de04mv==2
replace de04mv = 2 if missing(std5) & de04mv==1
replace de04mv = 1 if missing(std5) & de04mv==0

replace de04mv = 6 if missing(std6) & de04mv==5
replace de04mv = 5 if missing(std6) & de04mv==4
replace de04mv = 4 if missing(std6) & de04mv==3
replace de04mv = 3 if missing(std6) & de04mv==2
replace de04mv = 2 if missing(std6) & de04mv==1
replace de04mv = 1 if missing(std6) & de04mv==0

replace de04mv = 7 if missing(std7) & de04mv==6
replace de04mv = 6 if missing(std7) & de04mv==5
replace de04mv = 5 if missing(std7) & de04mv==4
replace de04mv = 4 if missing(std7) & de04mv==3
replace de04mv = 3 if missing(std7) & de04mv==2
replace de04mv = 2 if missing(std7) & de04mv==1
replace de04mv = 1 if missing(std7) & de04mv==0

replace de04mv = 8 if missing(std8) & de04mv==7
replace de04mv = 7 if missing(std8) & de04mv==6
replace de04mv = 6 if missing(std8) & de04mv==5
replace de04mv = 5 if missing(std8) & de04mv==4
replace de04mv = 4 if missing(std8) & de04mv==3
replace de04mv = 3 if missing(std8) & de04mv==2
replace de04mv = 2 if missing(std8) & de04mv==1
replace de04mv = 1 if missing(std8) & de04mv==0

replace de04mv = 9 if missing(std9) & de04mv==8
replace de04mv = 8 if missing(std9) & de04mv==7
replace de04mv = 7 if missing(std9) & de04mv==6
replace de04mv = 6 if missing(std9) & de04mv==5
replace de04mv = 5 if missing(std9) & de04mv==4
replace de04mv = 4 if missing(std9) & de04mv==3
replace de04mv = 3 if missing(std9) & de04mv==2
replace de04mv = 2 if missing(std9) & de04mv==1
replace de04mv = 1 if missing(std9) & de04mv==0

replace de04mv = 10 if missing(std10) & de04mv==9
replace de04mv = 9 if missing(std10) & de04mv==8
replace de04mv = 8 if missing(std10) & de04mv==7
replace de04mv = 7 if missing(std10) & de04mv==6
replace de04mv = 6 if missing(std10) & de04mv==5
replace de04mv = 5 if missing(std10) & de04mv==4
replace de04mv = 4 if missing(std10) & de04mv==3
replace de04mv = 3 if missing(std10) & de04mv==2
replace de04mv = 2 if missing(std10) & de04mv==1
replace de04mv = 1 if missing(std10) & de04mv==0

replace de_04 = . if de04mv > 2


** Set all items to missing if more than two items are missing (to accurately compute reliability measures)


replace std1 = . if de04mv > 2
replace std2 = . if de04mv > 2
replace std3 = . if de04mv > 2
replace std4 = . if de04mv > 2
replace std5 = . if de04mv > 2
replace std6 = . if de04mv > 2
replace std7 = . if de04mv > 2
replace std8 = . if de04mv > 2
replace std9 = . if de04mv > 2
replace std10 = . if de04mv > 2


** New variable de_05


alpha std1_2005 std2_2005 std3_2005 std4_2005 std5_2005 std6_2005 std7_2005 std8_2005 std9_2005 std10_2005, generate(de_05) 


** Set scale value to missing, if more than two items are missing

mvpatterns std1_2005 std2_2005 std3_2005 std4_2005 std5_2005 std6_2005 std7_2005 std8_2005 std9_2005 std10_2005

generate de05mv = 0
replace de05mv = 1 if missing(std1_2005)

replace de05mv = 2 if missing(std2_2005) & de05mv==1
replace de05mv = 1 if missing(std2_2005) & de05mv==0

replace de05mv = 3 if missing(std3_2005) & de05mv==2
replace de05mv = 2 if missing(std3_2005) & de05mv==1
replace de05mv = 1 if missing(std3_2005) & de05mv==0

replace de05mv = 4 if missing(std4_2005) & de05mv==3
replace de05mv = 3 if missing(std4_2005) & de05mv==2
replace de05mv = 2 if missing(std4_2005) & de05mv==1
replace de05mv = 1 if missing(std4_2005) & de05mv==0

replace de05mv = 5 if missing(std5_2005) & de05mv==4
replace de05mv = 4 if missing(std5_2005) & de05mv==3
replace de05mv = 3 if missing(std5_2005) & de05mv==2
replace de05mv = 2 if missing(std5_2005) & de05mv==1
replace de05mv = 1 if missing(std5_2005) & de05mv==0

replace de05mv = 6 if missing(std6_2005) & de05mv==5
replace de05mv = 5 if missing(std6_2005) & de05mv==4
replace de05mv = 4 if missing(std6_2005) & de05mv==3
replace de05mv = 3 if missing(std6_2005) & de05mv==2
replace de05mv = 2 if missing(std6_2005) & de05mv==1
replace de05mv = 1 if missing(std6_2005) & de05mv==0

replace de05mv = 7 if missing(std7_2005) & de05mv==6
replace de05mv = 6 if missing(std7_2005) & de05mv==5
replace de05mv = 5 if missing(std7_2005) & de05mv==4
replace de05mv = 4 if missing(std7_2005) & de05mv==3
replace de05mv = 3 if missing(std7_2005) & de05mv==2
replace de05mv = 2 if missing(std7_2005) & de05mv==1
replace de05mv = 1 if missing(std7_2005) & de05mv==0

replace de05mv = 8 if missing(std8_2005) & de05mv==7
replace de05mv = 7 if missing(std8_2005) & de05mv==6
replace de05mv = 6 if missing(std8_2005) & de05mv==5
replace de05mv = 5 if missing(std8_2005) & de05mv==4
replace de05mv = 4 if missing(std8_2005) & de05mv==3
replace de05mv = 3 if missing(std8_2005) & de05mv==2
replace de05mv = 2 if missing(std8_2005) & de05mv==1
replace de05mv = 1 if missing(std8_2005) & de05mv==0

replace de05mv = 9 if missing(std9_2005) & de05mv==8
replace de05mv = 8 if missing(std9_2005) & de05mv==7
replace de05mv = 7 if missing(std9_2005) & de05mv==6
replace de05mv = 6 if missing(std9_2005) & de05mv==5
replace de05mv = 5 if missing(std9_2005) & de05mv==4
replace de05mv = 4 if missing(std9_2005) & de05mv==3
replace de05mv = 3 if missing(std9_2005) & de05mv==2
replace de05mv = 2 if missing(std9_2005) & de05mv==1
replace de05mv = 1 if missing(std9_2005) & de05mv==0

replace de05mv = 10 if missing(std10_2005) & de05mv==9
replace de05mv = 9 if missing(std10_2005) & de05mv==8
replace de05mv = 8 if missing(std10_2005) & de05mv==7
replace de05mv = 7 if missing(std10_2005) & de05mv==6
replace de05mv = 6 if missing(std10_2005) & de05mv==5
replace de05mv = 5 if missing(std10_2005) & de05mv==4
replace de05mv = 4 if missing(std10_2005) & de05mv==3
replace de05mv = 3 if missing(std10_2005) & de05mv==2
replace de05mv = 2 if missing(std10_2005) & de05mv==1
replace de05mv = 1 if missing(std10_2005) & de05mv==0

replace de_05 = . if de05mv > 2

** Set all items to missing if more than two items are missing (to accurately compute reliability measures)

replace std1_2005 = . if de05mv > 2
replace std2_2005 = . if de05mv > 2
replace std3_2005 = . if de05mv > 2
replace std4_2005 = . if de05mv > 2
replace std5_2005 = . if de05mv > 2
replace std6_2005 = . if de05mv > 2
replace std7_2005 = . if de05mv > 2
replace std8_2005 = . if de05mv > 2
replace std9_2005 = . if de05mv > 2
replace std10_2005 = . if de05mv > 2


** New variable de_07

alpha std1_2007 std2_2007 std3_2007 std4_2007 std5_2007 std6_2007 std7_2007 std8_2007 std9_2007 std10_2007, generate(de_07) 

** Set scale value to missing, if more than two items are missing


mvpatterns std1_2007 std2_2007 std3_2007 std4_2007 std5_2007 std6_2007 std7_2007 std8_2007 std9_2007 std10_2007

generate de07mv = 0
replace de07mv = 1 if missing(std1_2007)

replace de07mv = 2 if missing(std2_2007) & de07mv==1
replace de07mv = 1 if missing(std2_2007) & de07mv==0

replace de07mv = 3 if missing(std3_2007) & de07mv==2
replace de07mv = 2 if missing(std3_2007) & de07mv==1
replace de07mv = 1 if missing(std3_2007) & de07mv==0

replace de07mv = 4 if missing(std4_2007) & de07mv==3
replace de07mv = 3 if missing(std4_2007) & de07mv==2
replace de07mv = 2 if missing(std4_2007) & de07mv==1
replace de07mv = 1 if missing(std4_2007) & de07mv==0

replace de07mv = 5 if missing(std5_2007) & de07mv==4
replace de07mv = 4 if missing(std5_2007) & de07mv==3
replace de07mv = 3 if missing(std5_2007) & de07mv==2
replace de07mv = 2 if missing(std5_2007) & de07mv==1
replace de07mv = 1 if missing(std5_2007) & de07mv==0

replace de07mv = 6 if missing(std6_2007) & de07mv==5
replace de07mv = 5 if missing(std6_2007) & de07mv==4
replace de07mv = 4 if missing(std6_2007) & de07mv==3
replace de07mv = 3 if missing(std6_2007) & de07mv==2
replace de07mv = 2 if missing(std6_2007) & de07mv==1
replace de07mv = 1 if missing(std6_2007) & de07mv==0

replace de07mv = 7 if missing(std7_2007) & de07mv==6
replace de07mv = 6 if missing(std7_2007) & de07mv==5
replace de07mv = 5 if missing(std7_2007) & de07mv==4
replace de07mv = 4 if missing(std7_2007) & de07mv==3
replace de07mv = 3 if missing(std7_2007) & de07mv==2
replace de07mv = 2 if missing(std7_2007) & de07mv==1
replace de07mv = 1 if missing(std7_2007) & de07mv==0

replace de07mv = 8 if missing(std8_2007) & de07mv==7
replace de07mv = 7 if missing(std8_2007) & de07mv==6
replace de07mv = 6 if missing(std8_2007) & de07mv==5
replace de07mv = 5 if missing(std8_2007) & de07mv==4
replace de07mv = 4 if missing(std8_2007) & de07mv==3
replace de07mv = 3 if missing(std8_2007) & de07mv==2
replace de07mv = 2 if missing(std8_2007) & de07mv==1
replace de07mv = 1 if missing(std8_2007) & de07mv==0

replace de07mv = 9 if missing(std9_2007) & de07mv==8
replace de07mv = 8 if missing(std9_2007) & de07mv==7
replace de07mv = 7 if missing(std9_2007) & de07mv==6
replace de07mv = 6 if missing(std9_2007) & de07mv==5
replace de07mv = 5 if missing(std9_2007) & de07mv==4
replace de07mv = 4 if missing(std9_2007) & de07mv==3
replace de07mv = 3 if missing(std9_2007) & de07mv==2
replace de07mv = 2 if missing(std9_2007) & de07mv==1
replace de07mv = 1 if missing(std9_2007) & de07mv==0

replace de07mv = 10 if missing(std10_2007) & de07mv==9
replace de07mv = 9 if missing(std10_2007) & de07mv==8
replace de07mv = 8 if missing(std10_2007) & de07mv==7
replace de07mv = 7 if missing(std10_2007) & de07mv==6
replace de07mv = 6 if missing(std10_2007) & de07mv==5
replace de07mv = 5 if missing(std10_2007) & de07mv==4
replace de07mv = 4 if missing(std10_2007) & de07mv==3
replace de07mv = 3 if missing(std10_2007) & de07mv==2
replace de07mv = 2 if missing(std10_2007) & de07mv==1
replace de07mv = 1 if missing(std10_2007) & de07mv==0

replace de_07 = . if de07mv > 2


** Set all items to missing if more than two items are missing (to accurately compute reliability measures)

replace std1_2007 = . if de07mv > 2
replace std2_2007 = . if de07mv > 2
replace std3_2007 = . if de07mv > 2
replace std4_2007 = . if de07mv > 2
replace std5_2007 = . if de07mv > 2
replace std6_2007 = . if de07mv > 2
replace std7_2007 = . if de07mv > 2
replace std8_2007 = . if de07mv > 2
replace std9_2007 = . if de07mv > 2
replace std10_2007 = . if de07mv > 2



** New variable de_14

alpha std1_2014 std2_2014 std3_2014 std4_2014 std5_2014 std6_2014 std7_2014 std8_2014 std9_2014 std10_2014, generate(de_14) 

** Set scale value to missing, if more than two items are missing

mvpatterns std1_2014 std2_2014 std3_2014 std4_2014 std5_2014 std6_2014 std7_2014 std8_2014 std9_2014 std10_2014

generate de14mv = 0
replace de14mv = 1 if missing(std1_2014)

replace de14mv = 2 if missing(std2_2014) & de14mv==1
replace de14mv = 1 if missing(std2_2014) & de14mv==0

replace de14mv = 3 if missing(std3_2014) & de14mv==2
replace de14mv = 2 if missing(std3_2014) & de14mv==1
replace de14mv = 1 if missing(std3_2014) & de14mv==0

replace de14mv = 4 if missing(std4_2014) & de14mv==3
replace de14mv = 3 if missing(std4_2014) & de14mv==2
replace de14mv = 2 if missing(std4_2014) & de14mv==1
replace de14mv = 1 if missing(std4_2014) & de14mv==0

replace de14mv = 5 if missing(std5_2014) & de14mv==4
replace de14mv = 4 if missing(std5_2014) & de14mv==3
replace de14mv = 3 if missing(std5_2014) & de14mv==2
replace de14mv = 2 if missing(std5_2014) & de14mv==1
replace de14mv = 1 if missing(std5_2014) & de14mv==0

replace de14mv = 6 if missing(std6_2014) & de14mv==5
replace de14mv = 5 if missing(std6_2014) & de14mv==4
replace de14mv = 4 if missing(std6_2014) & de14mv==3
replace de14mv = 3 if missing(std6_2014) & de14mv==2
replace de14mv = 2 if missing(std6_2014) & de14mv==1
replace de14mv = 1 if missing(std6_2014) & de14mv==0

replace de14mv = 7 if missing(std7_2014) & de14mv==6
replace de14mv = 6 if missing(std7_2014) & de14mv==5
replace de14mv = 5 if missing(std7_2014) & de14mv==4
replace de14mv = 4 if missing(std7_2014) & de14mv==3
replace de14mv = 3 if missing(std7_2014) & de14mv==2
replace de14mv = 2 if missing(std7_2014) & de14mv==1
replace de14mv = 1 if missing(std7_2014) & de14mv==0

replace de14mv = 8 if missing(std8_2014) & de14mv==7
replace de14mv = 7 if missing(std8_2014) & de14mv==6
replace de14mv = 6 if missing(std8_2014) & de14mv==5
replace de14mv = 5 if missing(std8_2014) & de14mv==4
replace de14mv = 4 if missing(std8_2014) & de14mv==3
replace de14mv = 3 if missing(std8_2014) & de14mv==2
replace de14mv = 2 if missing(std8_2014) & de14mv==1
replace de14mv = 1 if missing(std8_2014) & de14mv==0

replace de14mv = 9 if missing(std9_2014) & de14mv==8
replace de14mv = 8 if missing(std9_2014) & de14mv==7
replace de14mv = 7 if missing(std9_2014) & de14mv==6
replace de14mv = 6 if missing(std9_2014) & de14mv==5
replace de14mv = 5 if missing(std9_2014) & de14mv==4
replace de14mv = 4 if missing(std9_2014) & de14mv==3
replace de14mv = 3 if missing(std9_2014) & de14mv==2
replace de14mv = 2 if missing(std9_2014) & de14mv==1
replace de14mv = 1 if missing(std9_2014) & de14mv==0

replace de14mv = 10 if missing(std10_2014) & de14mv==9
replace de14mv = 9 if missing(std10_2014) & de14mv==8
replace de14mv = 8 if missing(std10_2014) & de14mv==7
replace de14mv = 7 if missing(std10_2014) & de14mv==6
replace de14mv = 6 if missing(std10_2014) & de14mv==5
replace de14mv = 5 if missing(std10_2014) & de14mv==4
replace de14mv = 4 if missing(std10_2014) & de14mv==3
replace de14mv = 3 if missing(std10_2014) & de14mv==2
replace de14mv = 2 if missing(std10_2014) & de14mv==1
replace de14mv = 1 if missing(std10_2014) & de14mv==0

replace de_14 = . if de14mv > 2


** Set all items to missing if more than two items are missing (to accurately compute reliability measures)

replace std1_2014 = . if de14mv > 2
replace std2_2014 = . if de14mv > 2
replace std3_2014 = . if de14mv > 2
replace std4_2014 = . if de14mv > 2
replace std5_2014 = . if de14mv > 2
replace std6_2014 = . if de14mv > 2
replace std7_2014 = . if de14mv > 2
replace std8_2014 = . if de14mv > 2
replace std9_2014 = . if de14mv > 2
replace std10_2014 = . if de14mv > 2



******************************************
*** Compute Work Experience Variables  ***
******************************************
 

** New variable twe_04
** Captures pre-study tenure (i.e. total work experience as a physician at T1)


gen twe_04_years = dauer1
gen twe_04 = twe_04_years * 12


** Total work experience at T3 

gen twe_07 = dauer_Arzt_2007

** Total work experience at T4
** T3 questionnaire was sent out Feb 2007 
** T4 questionnaire was sent out Jan 2014
** -> Time between T3 and T4 = 83 months

gen twe_14 = .

** Case 1: No break from the job between T3 and T4 (unterbr_2014=1)
replace twe_14 = twe_07+83 if unterbr_2014==1

** Case 2: Break from the job between T3 and T4 (unterbr_2014=2)
replace twe_14 = twe_07+83-unterbr_dauer_2014 if unterbr_2014==2


** New variable ot_07
** Captures observational tenure (i.e. additional work experience collected between T1 and T3)

gen ot_07 = twe_07-twe_04


** New variable ot_14
** Captures observational tenure (i.e. additional work experience collected between T1 and T4)

gen ot_14 = twe_14-twe_04

******************************************
*** Compute Leadership Position Variables  ***
******************************************

** New variable lp_04
gen lp_04 =0
replace lp_04= . if position== . 
replace lp_04=1 if position==4

** Set variable to missing if position = other (position=5)
replace lp_04= . if position==5


** New variable lp_05
gen lp_05 =0
replace lp_05= .  if position_2005== . 
replace lp_05=1 if position_2005==4

** Set variable to missing if position = other (position_2005=5)
replace lp_05= . if position_2005==5


** New variable lp_07
gen lp_07 =0
replace lp_07= . if position_2007== . 
replace lp_07=1 if position_2007==4
replace lp_07=1 if position_2007==5

** Set variable to missing if position = other (position_2007=6)
replace lp_07= . if position_2007==6

 
** New variable lp_14
gen lp_14 =0
replace lp_14= .  if position_2014== .
replace lp_14=1 if position_2014==4
replace lp_14=1 if position_2014==5
replace lp_14=1 if position_2014==6
replace lp_14=1 if position_2014==7
replace lp_14=1 if position_2014==8

** Set variable to missing if position = other (position_2014=9)
replace lp_14= .  if position_2014==9




******************************************
*** Compute Specialist Training Completed Variables  ***
******************************************

** New variable stc_04
** Question not asked at T1 as all participants expected to be in training (according to inclusion criteria)
gen stc_04 = .

** New variable stc_05
** Question not asked at T2 as all participants expected to be in training (according to inclusion criteria)
gen stc_05 = .
replace stc_05=. if follow_up_2005==0


** New variable stc_07
gen stc_07 = 0
replace stc_07=. if follow_up_2007==0
replace stc_07=. if abg_weit_2007==.
replace stc_07=1 if abg_weit_2007==2

** New variable stc_14
gen stc_14 =0
replace stc_14=. if follow_up_2014==.
replace stc_14=. if abg_weit_2014==.
replace stc_14=1 if abg_weit_2014==2





************************************************************
***************** Compute Working Part-Time Variables *********************************
************************************************************


** New variable pt_04


gen pt_04 = . 
replace pt_04 = 1 if vertrag==2
replace pt_04 = 0 if vertrag==1

** New variable pt_05

gen pt_05 = . 
replace pt_05 = 1 if vertrag_2005==2
replace pt_05 = 0 if vertrag_2005==1


** New variable pt_07

gen pt_07 = . 
replace pt_07 = 1 if vertrag_2007==2
replace pt_07 = 0 if vertrag_2007==1

** New variable pt_14

gen pt_14 = . 
replace pt_14 = 1 if vertrag_2014==2
replace pt_14 = 0 if vertrag_2014==1




************************************************************
*** Compute Gender Indicator Variable ***
************************************************************

* New variable gender

gen gender = geschl

* Recode variable so that 0 = male and 1 = female

replace gender=0 if gender == 1
replace gender=1 if gender == 2

************************************************************
*** Define Age Variables ***
************************************************************


* New variable age_04
gen age_04 = alter

* New variable age_05
gen age_05 = alter_2005

* New variable age_07
gen age_07 = alter_2007

* New variable age_14
gen age_14 = alter_2014







************************************************************
*** Define Total Number Of Job Changes T1 - T4 ***
************************************************************

* New variable tjc
gen tjc = arbwechs_mal_2014



************************************************************
** New medical speciality variable
************************************************************

gen med_sp=0
replace med_sp=1 if weit1 == 1
replace med_sp=2 if weit2 == 2
replace med_sp=3 if weit3 == 3
replace med_sp=4 if weit4 == 4
replace med_sp=5 if weit5 == 5
replace med_sp=6 if weit6 == 6
replace med_sp=7 if weit7 == 7
replace med_sp=8 if weit8 == 8
replace med_sp=9 if weit9 == 9
replace med_sp=10 if weit10 == 10
replace med_sp=11 if weit11 == 11
replace med_sp=12 if weit12 == 12
replace med_sp=13 if weit13 == 13
replace med_sp=14 if weit14 == 14
replace med_sp=15 if weit15 == 15
replace med_sp=16 if weit16 == 16
replace med_sp=17 if weit17 == 17
replace med_sp=18 if weit18 == 18
replace med_sp=19 if weit19 == 19
replace med_sp=20 if weit20 == 20
replace med_sp=21 if weit21 == 21
replace med_sp=22 if weit22 == 22
replace med_sp=23 if weit23 == 23
replace med_sp=24 if weit24 == 24
replace med_sp=25 if weit25 == 25
replace med_sp=26 if weit26 == 26
replace med_sp=27 if weit27 == 27
replace med_sp=28 if weit28 == 28



************************************************************
*** New workplace variables ***
************************************************************

gen workplace_05 = .
replace workplace_05 = 1 if arbplatz_2005 == 1
replace workplace_05 = 2 if arbplatz_2005 == 2
replace workplace_05 = 3 if arbplatz_2005 == 3

gen workplace_07 = .
replace workplace_07 = 1 if arbplatz_2007 == 1
replace workplace_07 = 2 if arbplatz_2007 == 2
replace workplace_07 = 3 if arbplatz_2007 == 3

gen workplace_14 = .
replace workplace_14 = 1 if arb_gw_2014  == 1 & arb_med_2014 == 1 & arb_akutmed_2014 == 1 & arb_stat_2014 == 1
replace workplace_14 = 2 if arb_gw_2014  == 1 & arb_med_2014 == 1 & arb_akutmed_2014 == 1 & arb_stat_2014 == 0
replace workplace_14 = 3 if arb_gw_2014  == 2
replace workplace_14 = 3 if arb_med_2014  == 2
replace workplace_14 = 3 if arb_akutmed_2014 == 2





************************************************************
*** Recode Participant Identifier ***
************************************************************

* Transform code variable (storred as string) into a new numeric variable (=id)
encode code, generate(id)





************************************************************
*** Save Changes ***
************************************************************

save Study_Data.dta, replace

use Study_Data.dta, clear










******************************************
******************************************
*** C - DATA TRANSFORMATION FOR REGRESSION ANALYSIS ***
******************************************
******************************************


** Info:
**
** Current data set format: 
** id_A variable_1_04 variable_1_05 varibale_1_07 variable_1_14 variable_2_04  ...
** id_B variable_1_04 variable_1_05 varibale_1_07 variable_1_14 variable_2_04  ...
**
** Format required for regression analysis:
**
** id_A 2004 variable_1 variable_2 ...
** id_A 2005 variable_1 variable_2 ...
** id_A 2007 variable_1 variable_2 ...
** id_A 2014 variable_1 variable_2 ...
** id_B 2004 variable_1 variable_2 ...
** ...


** Data set expansion
* Create 3 identical copies of all rows (= 4 identical rows per id) 
* Label each of the identical rows with a specific year indicator (2004, 2005, 2007 or 2014)

expand=2, generate(year)

replace year= 2004 if year==0
replace year= 2005 if year==1

expand=2, generate(dupli)

replace year= 2007 if dupli==1 & year==2004
replace year= 2014 if dupli==1 & year==2005

drop dupli


* New variable gc (goal conflict)

gen gc=555

replace gc=gc_04 if year == 2004
replace gc=gc_05 if year == 2005
replace gc=gc_07 if year == 2007
replace gc=gc_14 if year == 2014



* New variable dev_gc (goal conflict scale: deviation from individual average)

gen dev_gc=555

replace dev_gc=dev_gc_04 if year == 2004
replace dev_gc=dev_gc_05 if year == 2005
replace dev_gc=dev_gc_07 if year == 2007
replace dev_gc=dev_gc_14 if year == 2014


* New variable js (job satisfaction)

gen js=555

replace js=js_04 if year == 2004
replace js=js_05 if year == 2005
replace js=js_07 if year == 2007
replace js=js_14 if year == 2014

* New variable we (work engagement)

gen we=555

replace we=we_04 if year == 2004
replace we=we_05 if year == 2005
replace we=we_07 if year == 2007
replace we=we_14 if year == 2014


* New variable de (depressive symptoms)

gen de=555

replace de=de_04 if year == 2004
replace de=de_05 if year == 2005
replace de=de_07 if year == 2007
replace de=de_14 if year == 2014



* New variable workplace

gen workplace = .


replace workplace=1 if year == 2004
replace workplace=workplace_05 if year == 2005
replace workplace=workplace_07 if year == 2007
replace workplace=workplace_14 if year == 2014






* New variable lp (leadership position)

gen lp=555

replace lp=lp_04 if year == 2004
replace lp=lp_05 if year == 2005
replace lp=lp_07 if year == 2007
replace lp=lp_14 if year == 2014


* New variable stc (specialist training completed)

gen stc=555

replace stc=stc_04 if year == 2004
replace stc=stc_05 if year == 2005
replace stc=stc_07 if year == 2007
replace stc=stc_14 if year == 2014

* New variable pt (part-time)

gen pt=555

replace pt=pt_04 if year == 2004
replace pt=pt_05 if year == 2005
replace pt=pt_07 if year == 2007
replace pt=pt_14 if year == 2014


* New variable ot (observational tenure)

gen ot = .


replace ot=ot_07 if year == 2007
replace ot=ot_14 if year == 2014



* New variable age_at_data_collection (age at T1 to T4)

gen age_at_data_collection=555

replace age_at_data_collection=age_04 if year == 2004
replace age_at_data_collection=age_05 if year == 2005
replace age_at_data_collection=age_07 if year == 2007
replace age_at_data_collection=age_14 if year == 2014



** New binary year indicator variables (year_04, year_05, year_07, year_14)

gen year_04=0
replace year_04=1 if year == 2004

gen year_05=0
replace year_05=1 if year == 2005

gen year_07=0
replace year_07=1 if year == 2007

gen year_14=0
replace year_14=1 if year == 2014





************************************************************
*** Save Changes As Separate Dataset ***
************************************************************

save Regression_Data.dta, replace

* Revert to previous (non-expanded) dataset
use Study_Data.dta, clear






******************************************
******************************************
*** D - DESCRIPTIVE STATISTICS   ***
******************************************
******************************************




******************************************
*** Respondants  ***
******************************************

* Respondants 2004 total/by gender 
tabulate gender

* Respondants 2005 total/by gender 
tabulate gender follow_up_2005

* Respondants 2007 total/by gender 
tabulate gender follow_up_2007

* Respondants 2014 total/by gender 
tabulate gender follow_up_2014


******************************************
*** Study Sample  ***
******************************************


*  Participants in study sample 2004
mvpatterns gc_04 js_04 we_04 de_04 

* Interpretation:
* All variables available: 587
* Only job satisfaction variable missing: 1
* Only depressive symptoms scale missing: 2
* Total: 590


* By gender

gen gender_tmp = gender

* Exclude participants with incomplete data
replace gender_tmp = . if missing(js_04) & missing(we_04) & missing(de_04)

tabulate gender_tmp
drop gender_tmp

*  Participants in study sample 2005
mvpatterns gc_05 js_05 we_05 de_05

* Interpretation:
* All variables available: 533
* Only job satisfaction variable missing: 1
* Only work engagement scale missing: 1
* Only depressive symptoms scale missing: 2
* Total: 537


* By gender

gen gender_tmp = gender

* Exclude participants with incomplete data
replace gender_tmp = . if missing(gc_05)

sum gender_tmp
drop gender_tmp

*  Participants in study sample 2007
mvpatterns gc_07 js_07 we_07 de_07 ot_07

* Interpretation:
* All variables available: 480
* Only work engagement scale missing: 2
* Only depressive symptoms scale missing: 2
* Only observational tenure variable missing: 3
* Total: 487


* By gender

gen gender_tmp = gender

* Exclude participants with incomplete data
replace gender_tmp = . if missing(gc_07)

sum gender_tmp
drop gender_tmp


*  Participants in study sample 2014
mvpatterns gc_14 js_14 we_14 de_14 ot_14

* Interpretation:
* All variables available: 377
* Only job satisfaction variable missing: 2
* Only depressive symptoms scale missing: 7
* Only observational tenure variable missing: 44
* Only depressive symptoms scale and observational tenure varibale missing: 1
* Only despressive symptoms scale and work engagement scale missing: 1
* Only job satisfaction and observational tenure variables missing: 1
* Total: 433

* By gender

gen gender_tmp = gender

* Exclude participants with incomplete data
replace gender_tmp = . if missing(gc_14)

sum gender_tmp
drop gender_tmp




******************************************
*** Medical Specialties  ***
******************************************




gen med_sp_tmp = med_sp
* Exclude participants with incomplete data
replace med_sp_tmp = . if missing(js_04) & missing(we_04) & missing(de_04)


tabulate med_sp_tmp,  matcell(freq) matrow(names)

*Export to Excel

putexcel A1=("Speciality") B1=("Freq.") C1=("Percent") using Specialties_04, replace
putexcel A2=matrix(names) B2=matrix(freq) C2=matrix(freq/r(N)) using Specialties_04, modify

drop med_sp_tmp




******************************************
*** Descriptive Statistics by Year ***
******************************************


** Age 2004
gen age_tmp = age_04

* Exclude participants with incomplete data
replace age_tmp = . if missing(js_04) & missing(we_04) & missing(de_04)

sum age_tmp
drop age_tmp

** Age 2005
gen age_tmp = age_05

* Exclude participants with incomplete data
replace age_tmp = . if missing(gc_05)

sum age_tmp
drop age_tmp

** Age 2007
gen age_tmp = age_07


* Exclude participants with incomplete data
replace age_tmp = . if missing(gc_07)

sum age_tmp
drop age_tmp


** Age 2014
gen age_tmp = age_14


* Exclude participants with incomplete data
replace age_tmp = . if missing(gc_14)

sum age_tmp
drop age_tmp





** Goal Conflict 2004
gen gc_tmp = gc_04

* Exclude participants with incomplete data
replace gc_tmp = . if missing(js_04) & missing(we_04) & missing(de_04)

sum gc_tmp
drop gc_tmp

** Goal Conflict 2005
sum gc_05

** Goal Conflict 2007
sum gc_07

** Goal Conflict 2014
sum gc_14





** Average Goal Conflict 2004
gen  avg_gc_tmp =  avg_gc_04

* Exclude participants with incomplete data
replace avg_gc_tmp = . if missing(js_04) & missing(we_04) & missing(de_04)

sum  avg_gc_tmp
drop  avg_gc_tmp


** Average Goal Conflict 2005

gen  avg_gc_tmp =  avg_gc_05

* Exclude participants with incomplete data
replace avg_gc_tmp = . if missing(gc_05)

sum  avg_gc_tmp
drop  avg_gc_tmp


** Average Goal Conflict 2007

gen  avg_gc_tmp =  avg_gc_07

* Exclude participants with incomplete data
replace avg_gc_tmp = . if missing(gc_07)

sum  avg_gc_tmp
drop  avg_gc_tmp

** Average Goal Conflict 2014

gen  avg_gc_tmp =  avg_gc_14

* Exclude participants with incomplete data
replace avg_gc_tmp = . if missing(gc_14)

sum  avg_gc_tmp
drop  avg_gc_tmp






** Goal Conflict Deviation From Individual Average 2004

gen  dev_gc_tmp =  dev_gc_04

* Exclude participants with incomplete data
replace dev_gc_tmp = . if missing(js_04) & missing(we_04) & missing(de_04)

sum  dev_gc_tmp
drop  dev_gc_tmp

** Goal Conflict Deviation From Individual Average 2005
sum dev_gc_05

** Goal Conflict Deviation From Individual Average 2007
sum dev_gc_07

** Goal Conflict Deviation From Individual Average 2014
sum dev_gc_14





** Job Satisfaction 2004
sum js_04

** Job Satisfaction 2005
gen  js_tmp =  js_05

* Exclude participants with incomplete data
replace js_tmp = . if missing(gc_05)

sum  js_tmp
drop  js_tmp

** Job Satisfaction 2007
gen  js_tmp =  js_07

* Exclude participants with incomplete data
replace js_tmp = . if missing(gc_07)

sum  js_tmp
drop  js_tmp

** Job Satisfaction 2014
gen  js_tmp =  js_14

* Exclude participants with incomplete data
replace js_tmp = . if missing(gc_14)

sum  js_tmp
drop  js_tmp






** Work Engagement 2004
sum we_04

** Work Engagement 2005
gen  we_tmp =  we_05

* Exclude participants with incomplete data
replace we_tmp = . if missing(gc_05)

sum  we_tmp
drop  we_tmp

** Work Engagement 2007
gen  we_tmp =  we_07

* Exclude participants with incomplete data
replace we_tmp = . if missing(gc_07)

sum  we_tmp
drop  we_tmp



** Work Engagement 2014
gen  we_tmp =  we_14

* Exclude participants with incomplete data
replace we_tmp = . if missing(gc_14)

sum  we_tmp
drop  we_tmp





** Depressive Symptoms 2004
sum de_04

** Depressive Symptoms 2005
gen  de_tmp =  de_05

* Exclude participants with incomplete data
replace de_tmp = . if missing(gc_05)

sum  de_tmp
drop  de_tmp

** Depressive Symptoms 2007
gen  de_tmp =  de_07

* Exclude participants with incomplete data
replace de_tmp = . if missing(gc_07)

sum  de_tmp
drop  de_tmp


** Depressive Symptoms 2014
gen  de_tmp =  de_14

* Exclude participants with incomplete data
replace de_tmp = . if missing(gc_14)

sum  de_tmp
drop  de_tmp







** Observational tenure 2007
gen  ot_tmp =  ot_07

* Exclude participants with incomplete data
replace ot_tmp = . if missing(gc_07)

sum  ot_tmp
drop  ot_tmp


** Observational tenure 2014
gen  ot_tmp =  ot_14

* Exclude participants with incomplete data
replace ot_tmp = . if missing(gc_14)

sum  ot_tmp
drop  ot_tmp




** Pre-study tenure 2004

gen twe_04_tmp = twe_04

* Exclude participants with incomplete data
replace twe_04_tmp = . if missing(js_04) & missing(we_04) & missing(de_04)

sum twe_04_tmp
drop twe_04_tmp

** Pre-study tenure 2005
gen twe_04_tmp = twe_04

* Exclude participants with incomplete data
replace twe_04_tmp = . if missing(gc_05)

sum twe_04_tmp
drop twe_04_tmp

** Pre-study tenure 2007
gen twe_04_tmp = twe_04

* Exclude participants with incomplete data
replace twe_04_tmp = . if missing(gc_07)

sum twe_04_tmp
drop twe_04_tmp


** Pre-study tenure 2014
gen twe_04_tmp = twe_04

* Exclude participants with incomplete data
replace twe_04_tmp = . if missing(gc_14)

sum twe_04_tmp
drop twe_04_tmp






** Workplace 2005
gen workplace_tmp = workplace_05

* Exclude participants with incomplete data
replace workplace_tmp = . if missing(gc_05)

tab workplace_tmp
drop workplace_tmp



** Workplace 2007
gen workplace_tmp = workplace_07


* Exclude participants with incomplete data
replace workplace_tmp = . if missing(gc_07)

tab workplace_tmp
drop workplace_tmp


** Workplace 2014
gen workplace_tmp = workplace_14

* Exclude participants with incomplete data
replace workplace_tmp = . if missing(gc_14)

tab workplace_tmp
drop workplace_tmp






** Leadership Position 2004

gen lp_tmp = lp_04

* Exclude participants with incomplete data
drop if missing(js_04) & missing(we_04) & missing(de_04)

* Missing values
mvpatterns lp_tmp

tab lp_tmp
drop lp_tmp


* Revert to previous (non-expanded) dataset
use Study_Data.dta, clear




** Leadership Position 2005
gen lp_tmp = lp_05

* Exclude participants with incomplete data
drop if missing(gc_05)

tab lp_tmp

* Missing values
mvpatterns lp_tmp


drop lp_tmp


* Revert to previous (non-expanded) dataset
use Study_Data.dta, clear




** Leadership Position 2007
gen lp_tmp = lp_07

* Exclude participants with incomplete data
drop if missing(gc_07)

tab lp_tmp
sum lp_tmp

* Missing values
mvpatterns lp_tmp

drop lp_tmp


* Revert to previous (non-expanded) dataset
use Study_Data.dta, clear





** Leadership Position 2014
gen lp_tmp = lp_14

* Exclude participants with incomplete data
drop if missing(gc_14)

sum lp_tmp
tab lp_tmp

* Missing values
mvpatterns lp_tmp



drop lp_tmp



* Revert to previous (non-expanded) dataset
use Study_Data.dta, clear






** Specialist training completed 2007
gen  stc_tmp =  stc_07

* Exclude participants with incomplete data
replace stc_tmp = . if missing(gc_07)

sum  stc_tmp
drop  stc_tmp


** Specialist training completed 2014
gen  stc_tmp =  stc_14

* Exclude participants with incomplete data
replace stc_tmp = . if missing(gc_14)

sum  stc_tmp
drop  stc_tmp






** Working part-time 2004

gen pt_tmp_04 = pt_04

* Exclude participants with incomplete data
replace pt_tmp_04 = . if missing(js_04) & missing(we_04) & missing(de_04)

sum pt_tmp_04




** Working part-time 2005
gen pt_tmp_05 = pt_05

* Exclude participants with incomplete data
replace pt_tmp_05 = . if missing(gc_05)

sum pt_tmp_05




** Working part-time 2007
gen pt_tmp_07 = pt_07

* Exclude participants with incomplete data
replace pt_tmp_07 = . if missing(gc_07)

sum pt_tmp_07


** Working part-time 2014
gen pt_tmp_14 = pt_14

* Exclude participants with incomplete data
replace pt_tmp_14 = . if missing(gc_14)

sum pt_tmp_14

** Overall average

gen gender_tmp = gender

gen worked_pt_once = .

replace worked_pt_once = 0 if pt_tmp_04 == 0
replace worked_pt_once = 0 if pt_tmp_05 == 0
replace worked_pt_once = 0 if pt_tmp_07 == 0
replace worked_pt_once = 0 if pt_tmp_14 == 0

replace worked_pt_once = 1 if pt_tmp_04 == 1
replace worked_pt_once = 1 if pt_tmp_05 == 1
replace worked_pt_once = 1 if pt_tmp_07 == 1
replace worked_pt_once = 1 if pt_tmp_14 == 1


tabulate worked_pt_once
tabulate worked_pt_once gender_tmp



drop pt_tmp_04
drop pt_tmp_05
drop pt_tmp_07
drop pt_tmp_14
drop worked_pt_once
drop gender_tmp


* Revert to previous (non-expanded) dataset
use Study_Data.dta, clear





** Total number of job changes 2004

gen tjc_tmp = tjc

* Exclude participants with incomplete data
replace tjc_tmp = . if missing(js_04) & missing(we_04) & missing(de_04)

sum tjc_tmp
drop tjc_tmp

** Total number of job changes 2005
gen tjc_tmp = tjc

* Exclude participants with incomplete data
replace tjc_tmp = . if missing(gc_05)

sum tjc_tmp
drop tjc_tmp

** Total number of job changes 2007
gen tjc_tmp = tjc

* Exclude participants with incomplete data
replace tjc_tmp = . if missing(gc_07)

sum tjc_tmp
drop tjc_tmp


** Total number of job changes 2014
gen tjc_tmp = tjc

* Exclude participants with incomplete data

replace tjc_tmp = . if missing(gc_14)

sum tjc_tmp
drop tjc_tmp




******************************************
*** Descriptive Statistics Totals ***
******************************************

* Load regression dataset
use Regression_Data.dta, replace



** Age
gen age_at_data_collection_tmp = age_at_data_collection

* Exclude participants with incomplete data
replace age_at_data_collection_tmp = . if year == 2004 & missing(js_04) & missing(we_04) & missing(de_04)
replace age_at_data_collection_tmp = . if year == 2005 & missing(gc_05)
replace age_at_data_collection_tmp = . if year == 2007 & missing(gc_07)
replace age_at_data_collection_tmp = . if year == 2014 & missing(gc_14)

sum age_at_data_collection_tmp
drop age_at_data_collection_tmp




** Gender
gen gender_tmp = gender

* Exclude participants with incomplete data
replace gender_tmp = . if year == 2004 & missing(js_04) & missing(we_04) & missing(de_04)
replace gender_tmp = . if year == 2005 & missing(gc_05)
replace gender_tmp = . if year == 2007 & missing(gc_07)
replace gender_tmp = . if year == 2014 & missing(gc_14)

sum gender_tmp

drop gender_tmp



** Goal conflict

gen gc_tmp = gc

* Exclude participants with incomplete data
replace gc_tmp = . if year == 2004 & missing(js_04) & missing(we_04) & missing(de_04)

sum gc_tmp
drop gc_tmp




** Goal conflict: deviation from individual average

gen dev_gc_tmp = dev_gc

* Exclude participants with incomplete data
replace dev_gc_tmp = . if year == 2004 & missing(js_04) & missing(we_04) & missing(de_04)

sum dev_gc_tmp
drop dev_gc_tmp


** Goal conflict: individual average
gen avg_gc_tmp = avg_gc

* Exclude participants with incomplete data
replace avg_gc_tmp = . if year == 2004 & missing(js_04) & missing(we_04) & missing(de_04)
replace avg_gc_tmp = . if year == 2005 & missing(gc_05)
replace avg_gc_tmp = . if year == 2007 & missing(gc_07)
replace avg_gc_tmp = . if year == 2014 & missing(gc_14)

sum avg_gc_tmp
drop avg_gc_tmp




** Job satisfaction
gen js_tmp = js

* Exclude participants with incomplete data
replace js_tmp = . if year == 2004 & missing(js_04) & missing(we_04) & missing(de_04)
replace js_tmp = . if year == 2005 & missing(gc_05)
replace js_tmp = . if year == 2007 & missing(gc_07)
replace js_tmp = . if year == 2014 & missing(gc_14)

sum js_tmp
drop js_tmp





** Work engagement
gen we_tmp = we

* Exclude participants with incomplete data
replace we_tmp = . if year == 2004 & missing(js_04) & missing(we_04) & missing(de_04)
replace we_tmp = . if year == 2005 & missing(gc_05)
replace we_tmp = . if year == 2007 & missing(gc_07)
replace we_tmp = . if year == 2014 & missing(gc_14)

sum we_tmp
drop we_tmp




** Depressive symptoms
gen de_tmp = de

* Exclude participants with incomplete data
replace de_tmp = . if year == 2004 & missing(js_04) & missing(we_04) & missing(de_04)
replace de_tmp = . if year == 2005 & missing(gc_05)
replace de_tmp = . if year == 2007 & missing(gc_07)
replace de_tmp = . if year == 2014 & missing(gc_14)

sum de_tmp
drop de_tmp


** Observational tenure
gen ot_tmp = ot

* Exclude participants with incomplete data
replace ot_tmp = . if year == 2007 & missing(gc_07)
replace ot_tmp = . if year == 2014 & missing(gc_14)

sum ot_tmp
drop ot_tmp


** Pre-study tenure
gen twe_04_tmp = twe_04

* Exclude participants with incomplete data
replace twe_04_tmp = . if year == 2004 & missing(js_04) & missing(we_04) & missing(de_04)
replace twe_04_tmp = . if year == 2005 & missing(gc_05)
replace twe_04_tmp = . if year == 2007 & missing(gc_07)
replace twe_04_tmp = . if year == 2014 & missing(gc_14)

sum twe_04_tmp
drop twe_04_tmp







** Workplace
gen workplace_tmp = workplace

* Exclude participants with incomplete data
replace workplace_tmp = . if year == 2004 & missing(js_04) & missing(we_04) & missing(de_04)
replace workplace_tmp = . if year == 2005 & missing(gc_05)
replace workplace_tmp = . if year == 2007 & missing(gc_07)
replace workplace_tmp = . if year == 2014 & missing(gc_14)

tab workplace_tmp
drop workplace_tmp






** Leadership position
gen lp_tmp = lp

* Exclude participants with incomplete data
replace lp_tmp = . if year == 2004 & missing(js_04) & missing(we_04) & missing(de_04)
replace lp_tmp = . if year == 2005 & missing(gc_05)
replace lp_tmp = . if year == 2007 & missing(gc_07)
replace lp_tmp = . if year == 2014 & missing(gc_14)

sum lp_tmp
drop lp_tmp



** Specialist training completed
gen stc_tmp = stc

* Exclude participants with incomplete data

replace stc_tmp = . if year == 2007 & missing(gc_07)
replace stc_tmp = . if year == 2014 & missing(gc_14)

sum stc_tmp
drop stc_tmp



** Working part-time
gen pt_tmp = pt

* Exclude participants with incomplete data
replace pt_tmp = . if year == 2004 & missing(js_04) & missing(we_04) & missing(de_04)
replace pt_tmp = . if year == 2005 & missing(gc_05)
replace pt_tmp = . if year == 2007 & missing(gc_07)
replace pt_tmp = . if year == 2014 & missing(gc_14)

sum pt_tmp

drop pt_tmp



** Total number of job changes
gen tjc_tmp = tjc

* Exclude participants with incomplete data
replace tjc_tmp = . if year == 2004 & missing(js_04) & missing(we_04) & missing(de_04)
replace tjc_tmp = . if year == 2005 & missing(gc_05)
replace tjc_tmp = . if year == 2007 & missing(gc_07)
replace tjc_tmp = . if year == 2014 & missing(gc_14)

sum tjc_tmp
drop tjc_tmp


* Revert to previous (non-expanded) dataset
use Study_Data.dta, clear


******************************************
******************************************
*** E -  DROPOUT ANALYSIS  ***
******************************************




**** Gender comparisons at Baseline ****
** Pool - Respondants - Study Sample **

* Create empty dataset

clear
set obs 1000


* Replicate gender distribution in initial pool, female = 1
gen gender = 1
gen id = _n

replace gender = 0 if id < 519 



* Set grouping variable, 1 = Pool - did not respond, 2 = respondants with incomplete data, 3 = study sample

gen group = 1

replace group = 3 if id < 286
replace group = 3 if id > 695

replace group = 2 if id > 285 & id < 304
replace group = 2 if id > 682 & id < 696



* Store as temporary dataset

save Temp_Data.dta, replace


**** Chi-squared Did not respond - Study Sample
drop if group == 2

tabulate gender group, chi

use Temp_Data.dta, clear


**** Chi - Squared Incomplete Response - Study Sample
drop if group == 1

tabulate gender group, chi



use Temp_Data.dta, clear

**** Chi-squared Did not respond - Incomplete resonse

drop if group == 3
tabulate gender group, chi

use Temp_Data.dta, clear

**** Chi-squared Did not respond or incomplete resonse & study sample

replace group = 1 if group == 2

tabulate gender group, chi








******************************************

* Revert to previous (non-expanded) dataset
use Study_Data.dta, clear



* Create seperate temporary dataset
save Temp_Data.dta, replace


mvpatterns gc_04 js_04 we_04 de_04

* Result: No missing values for gc_04
* 31 respondants with no data on js_04, we_04  and de_04

mvpatterns gc_05 js_05 we_05 de_05

* Result: 
* 84 respondants with no data on gc_05

mvpatterns gc_07 js_07 we_07 de_07

* Result:
* 134 respondants with no data on gc_07

mvpatterns gc_14 js_14 we_14 de_14

* Result:
* 188 respondants with no data on gc_14

* Generate new dropout analysis grouping variable (dropout_ana_group)
* 3 = excluded because not part of study sample (i.e. no data on outcomes provided at T1)
* 2 = participant dropped out (i.e. data on T1 but missing for T2 and/or T3 and/or T4)
* 1 = participant did not drop out (i.e. data on T1 - T4)
* Drop observations that are not part of the study sample (i.e. dropout_ana_group = 3

gen  dropout_ana_group = .

replace dropout_ana_group = 3 if missing(js_04) & missing(we_04) & missing(de_04)

drop if dropout_ana_group == 3

replace dropout_ana_group = 2 if missing(gc_05)
replace dropout_ana_group = 2 if missing(gc_07)
replace dropout_ana_group = 2 if missing(gc_14)


replace dropout_ana_group = 1 if missing(dropout_ana_group)


***
* Participants per group
***

tab dropout_ana_group


****
* Group comparisons (t-tests/chi-squared tests)
****

* T-test for age at T1

ttest age_04, by(dropout_ana_group)

* Chi-squared test for gender

tabulate gender dropout_ana_group, chi

* T-test for goal conflict at T1

ttest gc_04, by(dropout_ana_group)

* T-test for job satisfaction at T1

ttest js_04, by(dropout_ana_group)

* T-test for work engagement at T1

ttest we_04, by(dropout_ana_group)

* T-test for depressive symptoms at T1

ttest de_04, by(dropout_ana_group)

* T-test for pre-study tenure

ttest twe_04, by(dropout_ana_group)

* Chi-squared test for leadership position at T1

tabulate lp_04 dropout_ana_group, chi

* Chi-squared test for working part time at T1

tabulate pt_04 dropout_ana_group, chi





* Revert to previous (non-expanded) dataset
use Study_Data.dta, clear





******************************************
******************************************
*** F - PSYCHOMETRICS ***
******************************************
******************************************

save Temp_Data.dta, replace


************************************************************
*** Cronbach's Alpha***
************************************************************

**** Baseline ****
drop if  missing(js_04) & missing(we_04) & missing(de_04)

* Goal Conflict
alpha taa12 taa26 taa38 taa47, item
alpha taa12 taa26 taa38 taa47, detail

* Work Engagement
alpha enga1 enga4 enga5 enga7 enga8 enga9 enga10 enga11 enga12

* Depressive Symptoms
alpha std1 std2 std3 std4 std5 std6 std7 std8 std9 std10

use Temp_Data.dta, clear

*** Follow-up 1 ***

drop if missing(gc_05)

* Goal conflict
alpha  taa74_2005 taa75_2005 taa76_2005 taa77_2005, item
alpha  taa74_2005 taa75_2005 taa76_2005 taa77_2005, detail

* Work Engagement
alpha enga1_2005 enga4_2005 enga5_2005 enga7_2005 enga8_2005 enga9_2005 enga10_2005 enga11_2005 enga12_2005

* Depressive Symptoms
alpha std1_2005 std2_2005 std3_2005 std4_2005 std5_2005 std6_2005 std7_2005 std8_2005 std9_2005 std10_2005

use Temp_Data.dta, clear


*** Follow-up 2 ***

drop if missing(gc_07)

* Goal conflict
alpha  taa74_2007 taa75_2007 taa76_2007 taa77_2007, item
alpha  taa74_2007 taa75_2007 taa76_2007 taa77_2007, detail

* Work engagement
alpha enga1_2007 enga4_2007 enga5_2007 enga7_2007 enga8_2007 enga9_2007 enga10_2007 enga11_2007 enga12_2007

* Depressive symptoms
alpha std1_2007 std2_2007 std3_2007 std4_2007 std5_2007 std6_2007 std7_2007 std8_2007 std9_2007 std10_2007

use Temp_Data.dta, clear


*** Follow-up 3 ***

drop if missing(gc_14)

* Goal conflict
alpha  taa74_2014 taa75_2014 taa76_2014 taa77_2014, item
alpha  taa74_2014 taa75_2014 taa76_2014 taa77_2014, detail

* Work engagement
alpha enga1_2014 enga4_2014 enga5_2014 enga7_2014 enga8_2014 enga9_2014 enga10_2014 enga11_2014 enga12_2014

* Depressive symptoms
alpha std1_2014 std2_2014 std3_2014 std4_2014 std5_2014 std6_2014 std7_2014 std8_2014 std9_2014 std10_2014

use Temp_Data.dta, clear



************************************************************
*** Factor Analyses***
************************************************************

**** Baseline ****
drop if  missing(js_04) & missing(we_04) & missing(de_04)

* Goal conflict
factor taa12 taa26 taa38 taa47
fapara, reps(1000) title("2004")
graph export gc_04.png, width(2000) replace

* Work engagement
factor enga1 enga4 enga5 enga7 enga8 enga9 enga10 enga11 enga12
fapara, reps(1000) title("2004")
graph export we_04.png, width(2000) replace

* Depressive symptoms 
factor std1 std2 std3 std4 std5 std6 std7 std8 std9 std10
fapara, reps(1000) title("2004")
graph export de_04.png, width(2000) replace


use Temp_Data.dta, clear

**** Follow-up 1 ****

drop if missing(gc_05)

* Goal conflict
factor taa74_2005 taa75_2005 taa76_2005 taa77_2005
fapara, reps(1000) title("2005")
graph export gc_05.png, width(2000) replace

* Work engagement
factor enga1_2005 enga4_2005 enga5_2005 enga7_2005 enga8_2005 enga9_2005 enga10_2005 enga11_2005 enga12_2005
fapara, reps(1000) title("2005")
graph export we_05.png, width(2000) replace

* Depressive symptoms
factor std1_2005 std2_2005 std3_2005 std4_2005 std5_2005 std6_2005 std7_2005 std8_2005 std9_2005 std10_2005
fapara, reps(1000) title("2005")
graph export de_05.png, width(2000) replace

use Temp_Data.dta, clear

*** Follow-up 2 ***
drop if missing(gc_07)

* Goal conflict 
factor taa74_2007 taa75_2007 taa76_2007 taa77_2007
fapara, reps(1000) title("2007")
graph export gc_07.png, width(2000) replace

* Work engagement
factor enga1_2007 enga4_2007 enga5_2007 enga7_2007 enga8_2007 enga9_2007 enga10_2007 enga11_2007 enga12_2007
fapara, reps(1000) title("2007")
graph export we_07.png, width(2000) replace

* Depressive symptoms
factor std1_2007 std2_2007 std3_2007 std4_2007 std5_2007 std6_2007 std7_2007 std8_2007 std9_2007 std10_2007
fapara, reps(1000) title("2007")
graph export de_07.png, width(2000) replace

use Temp_Data.dta, clear

*** Follow-up 3 ***

drop if missing(gc_14)

* Goal conflict
factor taa74_2014 taa75_2014 taa76_2014 taa77_2014
fapara, reps(1000) title("2014")
graph export gc_14.png, width(2000) replace

* Work engagement
factor enga1_2014 enga4_2014 enga5_2014 enga7_2014 enga8_2014 enga9_2014 enga10_2014 enga11_2014 enga12_2014
fapara, reps(1000) title("2014")
graph export we_14.png, width(2000) replace

* Depressive symptoms
factor std1_2014 std2_2014 std3_2014 std4_2014 std5_2014 std6_2014 std7_2014 std8_2014 std9_2014 std10_2014
fapara, reps(1000) title("2014")
graph export de_14.png, width(2000) replace

* Revert to previous (non-expanded) dataset
use Study_Data.dta, clear



******************************************
******************************************
*** G -  EXPORT DATASET CREATION ***
******************************************
******************************************


* Load regression dataset
use Regression_Data.dta, replace



keep id gc dev_gc avg_gc js we de age_04 gender twe_04 ot stc lp pt tjc year year_04 year_05 year_07 year_14 


save Export_Data.dta, replace







