/* SAS(r) Macro for Doubly Robust Estimation v0.90 (c) 2006 The University of North Carolina at Chapel Hill. All rights reserved in accordance with license agreement accompanying the work. See Readme and accompanying documentation for important information on using the macro. Updates will be posted at http://www.harryguess.unc.edu. */ /* the double robust estimation method */ %macro foo; /* get the modeled outcome values under MODEL */ /* with each treatment group modeled separately */ /* create _wgts0_ and _wgts1_ were response is set to missing */ /* for one treatment group */ /* create data sets */ data _wgts0_; set _wgts_; %if &restype=1 %then %do; if __exp01=1 then &resvar=.; %end; %else %do; if __exp01=1 then &resvar=""; %end; run; data _wgts1_; set _wgts_; %if &restype=1 %then %do; if __exp01=0 then &resvar=.; %end; %else %do; if __exp01=0 then &resvar=""; %end; run; /* if the outcome variable distribution is NORMAL */ %if (&mdist=NORMAL or &mdist=N) %then %do; /* regression modeled values m0 */ proc glm data=_wgts0_;* noprint; model &outmod; output out=_modres0_(keep=__m0) p=__m0; run; /* regression modeled values m1 */ proc glm data=_wgts1_;* noprint; model &outmod; output out=_modres1_(keep=__m1) p=__m1; run; %end; /* if the outcome variable distrbution is BINOMIAL */ %if (&mdist=BINOMIAL or &mdist=B or &mdist=BIN) %then %do; /* regression modeled values m0 */ proc logistic data=_wgts0_ &desc;* noprint; ods exclude modelinfo; ods exclude nobs; model &outmod; output out=_modres0_(keep=__m0) p=__m0; run; /* regression modeled values m1 */ proc logistic data=_wgts1_ &desc;* noprint; ods exclude modelinfo; ods exclude nobs; model &outmod; output out=_modres1_(keep=__m1) p=__m1; run; %end; /* combine m0 and m1 */ data _modres01_(keep=&expvar &resvar __wgt __m0 __m1 __exp01 __res01); merge _wgts_ _modres0_ _modres1_; run; /* delete several data sets */ proc datasets nolist; delete _wgts0_ _wgts1_ _modres0_ _modres1_; run; quit; /* create DR0 and DR1 and their difference DR1_DR0 statistics */ data _dr01_; set _modres01_; dr0=((1-__exp01)*__res01+(__exp01-__wgt)*__m0)/(1-__wgt); dr1=(__exp01*__res01-(__exp01-__wgt)*__m1)/__wgt; dr1_dr0=dr1-dr0; run; /* obtain mean of difference DR1_DR0 (DELTA_DR) */ proc means noprint data=_dr01_; var dr1_dr0 dr0 dr1; output out=_mdr01_(drop=_type_ _freq_) mean=deltadr dr0 dr1; run; /* merge DELTA_DR (constant) with DR1_DR0 to compute L&D equation 21 */ /* i squared */ data _i2_; if _n_=1 then set _mdr01_; set _dr01_; i2=(dr1_dr0-deltadr)**2; run; /* mean of i squared and n */ proc means data=_i2_ noprint; var i2; output out=_i2_ mean= n=__n; run; /* standard error */ data _i2_; set _i2_; se=sqrt(i2/__n); run; /* combine statistics */ data _mdr01_; merge _mdr01_ _i2_; run; %mend; %macro dr(input); /* macro variable INPUT all in upper case */ %let input=%qupcase(&input); /* scan the input statement for appropriate statements */ /* OPTIONS WTMODEL MODEL ESTIMATE */ /* initialize all macro variables to missing */ %let options=; %let wtmodel=; %let expvar=; %let wtmivs=; %let model=; %let resvar=; %let modelivs=; %let usedata=; %let totalobs=; %let usedobs=; %let first=; %let last=; /* initialize the descending option indicator DESC to missing */ %let desc=; /* initialize the estimate statements list ESTIMATE to missing */ %let estimate=; /* initialize DATA to work._last_ */ %let data=WORK._LAST_; /* initialize the exposure model options */ /* initialize the exposure model method WTMMETH to dr */ %let wtmmeth=DR; /* initialize the exposere model showcurves option SHCRV to 0 */ %let shcrv=0; /* initialize the exposure model common suppport option CMSPT to 0 */ %let cmspt=0; /* initialize the exposure model common support region option CMSPTR to 100 */ %let cmsptr=100; /* intialize the exposure model number of strata option STRATA to 0 */ %let strata=0; /* initialize the exposure model combine stratum-specified estimates option COMB to invvar */ %let comb=INVVAR; /* initialize the exposure model link option WTLINK to logit */ %let wtlink=LOGIT; /* initialize the exposure model distribution option WTDIST to binomial */ %let wtdist=BINOMIAL; /* initialize the exposure model ouput data set OUTPS to missing */ %let outps=; /* initialize the model options */ /* initialize the model link opiton MLINK to identity */ %let mlink=IDENTITY; /* initialize the model distribution option MDIST to normal */ %let mdist=NORMAL; /* remove leading and trailing blanks around the = */ data _null_; temp="&input"; xx=index(temp,' ='); do while (xx); temp=substr(temp,1,(xx-1))||substr(temp,(xx+1)); xx=index(temp,' ='); end; xx=index(temp,'= '); do while (xx); temp=substr(temp,1,xx)||substr(temp,(xx+2)); xx=index(temp,'= '); end; call symput('input',temp); run; /* initialize iteration variables to 1 */ %let iv=1; %let oiv=1; /* iterate through INPUT */ %do %while (%length(%scan(&input,&iv,;))); /* select a statement STATE in INPUT */ %let state=%qscan(&input,&iv,%str(;)); /* the type of statement is determined by the first word */ /* TYPE may be OPTIONS WTMODEL MODEL or ESTIMATE */ %let type=%qscan(&state,1); /* NEXT is the location of the end of TYPE */ %let next=%eval(%index(&state,&type)+%length(&type)); /* extract the appropriate information from each statement */ /* extract the OPTIONS information */ %if %index(&type,OPTIONS) %then %do; %let options=%qsubstr(&state,&next); /* two options are available DATA= and DESCENDING */ /* iterate through the options */ %do %while (%length(%scan(&options,&oiv))); %let opt=%qscan(&options,&oiv,%str( )); /* if the option is DATA= get the dataset name */ %if %index(&opt,DATA) %then %do; %let data=%qscan(&opt,2,=); %end; /* if the option is DESCENDING set DESC to DESCENDING */ %if %index(&opt,DESCENDING) %then %do; %let desc=DESCENDING; %end; /* OIV=OIV+1 */ %let oiv=%eval(&oiv+1); /* end of %do %while */ %end; /* end TYPE is OPTIONS */ %end; /* extract the WTMODEL information */ %if %index(&type,WTMODEL) %then %do; %let wtmodel=%qsubstr(&state,&next); %end; /* extract the MODEL information */ %if %index(&type,MODEL) %then %do; %let model=%qsubstr(&state,&next); %end; /* extract the ESTIMATE information */ %if %index(&type,ESTIMATE) %then %do; %let estimate=&estimate%qsubstr(&state,&next),; %end; /* all options have been searched for */ /* IV=IV+1 */ %let iv=%eval(&iv+1); /* all of the statements are set */ %end; /* decompose the WTMODEL statement */ /* if exposure model options are selected then */ /* extract the options WTMOPTS and the model WTM */ %if %index(&wtmodel,/) %then %do; /* initialize iteration variable to 1 */ %let iv=1; /* get the exposure model */ %let wtm=%qscan(&wtmodel,1,/); /* get the exposure variable */ %let expvar=%qscan(&wtm,1,=); /* get the exposure model independent variables */ %let wtmivs=%qscan(&wtm,2,=); /* get the exposure model options */ %let wtmopts=%qscan(&wtmodel,2,/); /* iterate through WTMOPTS */ %do %while (%length(%scan(&wtmopts,&iv,%str( )))); /* select an option WTMOPT in WTMOPTS */ %let wtmopt=%qscan(&wtmopts,&iv,%str( )); /* do the appropriate action for each possible exposure model option */ /* METHOD= SHOWCURVES COMMON_SUPPORT= STRATA= COMBINE= LINK= DIST= OUTPS= */ /* extract the method information WTMMETH*/ %if %index(&wtmopt,METHOD) %then %do; %let wtmmeth=%qscan(&wtmopt,2,=); %end; /* extract the show curves information SHCRV */ %if %index(&wtmopt,SHOWCURVES) %then %do; %let shcrv=1; %end; /* extract the common support information CMSPT */ %if %index(&wtmopt,COMMON_SUPPORT) %then %do; %let cmspt=1; %let cmsptr=%qscan(&wtmopt,2,=); %if (&cmsptr>100) %then %do; %let cmspt=0; %end; %else %if (&cmsptr<=1) %then %do; data _null_; cmsptr=compress(put(round(&cmsptr*100),8.)); call symput('cmsptr',cmsptr); run; %end; %if (&cmsptr<=0) %then %do; %let cmspt=0; %end; %end; /* extract the number of strata information STRATA */ %if %index(&wtmopt,STRATA) %then %do; %let strata=%qscan(&wtmopt,2,=); %end; /* extract the combine stratum-specified estimates information COMB */ %if %index(&wtmopt,COMBINE) %then %do; %let comb=%qscan(&wtmopt,2,=); %end; /* extract the link function WTLINK */ %if %index(&wtmopt,LINK) %then %do; %let wtlink=%qscan(&wtmopt,2,=); %end; /* extract the distribution WTDIST */ %if %index(&wtmopt,DIST) %then %do; %let wtdist=%qscan(&wtmopt,2,=); %end; /* extract the name of the output file OUTPS */ %if %index(&wtmopt,OUTPS) %then %do; %let outps=%qscan(&wtmopt,2,=); %end; /* IV=IV+1 */ %let iv=%eval(&iv+1); %end; %end; /* if exposure model options are not selected then */ /* extract the exposure model WTM */ %else %do; %let wtm=&wtmodel; /* get the exposure variable */ %let expvar=%qscan(&wtm,1,=); %end; /* decompose the MODEL statement */ /* if model options are selected then */ /* extract the options MOPTS and the model OUTMOD */ %if %index(&model,/) %then %do; /* initialize iteration variable to 1 */ %let iv=1; /* get the model */ %let outmod=%qscan(&model,1,/); /* get the response variable */ %let resvar=%qscan(&outmod,1,=); /* get the independent variables */ %let modelivs=%qscan(&outmod,2,=); /* get the model options */ %let mopts=%qscan(&model,2,/); /* iterate through MOPTS */ %do %while (%length(%scan(&mopts,&iv,%str( )))); /* select an option MOPT in MOPTS */ %let mopt=%qscan(&mopts,&iv,%str( )); /* do the appropriate action for each possible model option */ /* LINK= DIST= */ /* extract the link function MLINK */ %if %index(&mopt,LINK) %then %do; %let mlink=%qscan(&mopt,2,=); %end; /* extract the distribution MDIST */ %if %index(&mopt,DIST) %then %do; %let mdist=%qscan(&mopt,2,=); %end; /* IV=IV+1 */ %let iv=%eval(&iv+1); %end; %end; /* if model options are not selected then */ /* extract the model OUTMOD */ %else %do; %let outmod=&model; /* get the response variable */ %let resvar=%qscan(&outmod,1,=); %end; /***********************/ /* initialize the data */ /***********************/ /* get the type of the treatment group variable */ proc contents data=&data noprint out=_conts_; run; data _null_; set _conts_; _foo_="&expvar"; _foo1_=compress(_foo_); if upcase(name)=_foo1_ then do; tt=put(type,8.); call symput('exptype',tt); end; run; /* get the type of response variable */ proc contents data=&data noprint out=_conts_; run; data _null_; set _conts_; _foo_="&resvar"; _foo1_=compress(_foo_); if upcase(name)=_foo1_ then do; tt=put(type,8.); call symput('restype',tt); end; run; /**************************/ /* deal with missing data */ /* use listwise deletion */ /**************************/ /* create _USEDATA_ data set that has no missing data */ data _usedata_; set &data; %if &exptype=1 %then %do; %if &restype=1 %then %do; if nmiss(of &expvar &wtmivs &resvar &modelivs)=0; %end; %else %do; if nmiss(of &expvar &wtmivs &modelivs)=0 and &resvar^=""; %end; %end; %else %do; %if &restype=1 %then %do; if nmiss(of &wtmivs &resvar &modelivs)=0 and &expvar^=""; %end; %else %do; if nmiss(of &wtmivs &modelivs)=0 and &resvar^="" and &expvar^=""; %end; %end; run; /* put a 0/1 exposure variable __EXP01 in the data set */ /* get the values of the exposure variable */ proc freq data=_usedata_ noprint; tables &expvar/out=_expname_(keep=&expvar); run; data _null_; set _expname_; if _n_=1 then do; %if &exptype=1 %then %do; expvar=put(&expvar,8.); %end; %else %do; expvar=&expvar; %end; call symput('_exp1_',expvar); end; run; data _usedata_; set _usedata_; %if &exptype=1 %then %do; __exp01=(&expvar^=&_exp1_); %end; %else %do; __exp01=(&expvar^="&_exp1_"); %end; run; /* if the reponse is binary */ /* put a 0/1 response variable __RES01 in the data set */ /* get the values of the response variable */ /* if the response is quantitative the __RES01 is the outcome */ %if ("&mdist"="B" or "&mdist"="BINOMIAL" or "&mdist"="BIN") %then %do; proc freq data=_usedata_ noprint; tables &resvar/out=_resname_(keep=&resvar); run; data _null_; set _resname_; if _n_=1 then do; %if &restype=1 %then %do; resvar=put(&resvar,8.); %end; %else %do; resvar=&resvar; %end; call symput('_res1_',resvar); end; run; data _usedata_; set _usedata_; %if &restype=1 %then %do; __res01=(&resvar^=&_res1_); %end; %else %do; __res01=(&resvar^="&_res1_"); %end; run; %end; %else %if ("&mdist"="N" or "&mdist"="NORMAL") %then %do; data _usedata_; set _usedata_; __res01=&resvar; run; %end; /* get the number of observations in the original data set */ data _null_; set &data end=__end; if __end then do; nn=put(_n_,8.); call symput('totalobs',nn); end; run; /* get the number of observations in _USEDATA_ */ data _null_; set _usedata_ end=__end; if __end then do; nn=put(_n_,8.); call symput('usedobs',nn); end; run; /************************************************************************/ /* logistic regression to get the modeled response probabilities _WGTS_ */ /************************************************************************/ proc logistic data=_usedata_ &desc;* noprint; ods exclude modelinfo; ods exclude nobs; model &wtm; output out=_wgts_ p=__wgt; run; /* get the mean of the __WGT variable */ proc means data=_wgts_; var __wgt; run; /************************************************************/ /* if SHOW_CURVES is specified then retain all observations */ /* for plotting in two data sets (treatment=0 and 1) */ /************************************************************/ %if (&shcrv=1) %then %do; data _shcrvs_; set _wgts_(keep=__wgt &expvar); run; %end; /* THIS CODE IS NO LONGER ACTIVE */ /* %if (&shcrv=1) %then %do; %do shcr_i=0 %to 1; data _shcrvs&shcr_i._; set _wgts_(keep=__wgt &expvar); if &expvar=&shcr_i; keep __wgt; run; %end; %end; */ /****************************************************/ /* if COMMON_SUPPORT is specified, reduce _wgts_ */ /* to the common support region */ /****************************************************/ %if (&cmspt=1) %then %do; /* sort by the propensity */ proc sort data=_wgts_ out=_wgts_; by __wgt; run; /* get the FIRST and LAST indicies to be retained in the data set */ /* based on the common support range, CMSPTR */ data _null_; set _wgts_(keep=__exp01) end=end; length last $8.; if _n_=1 then do; flag=__exp01; last=''; if flag=1 then templo=1; end; if flag=0 then do; if __exp01=1 then do; flag=1; templo=(_n_-1); end; end; else if __exp01=0 then temphi=(_n_+1); if end then do; if __exp01=0 then temphi=_n_; cmsptr=&cmsptr/100; alpha=(temphi-templo+1)*(1-cmsptr)/2; templo=round(templo+alpha); temphi=round(temphi-alpha); first=compress(put(templo,8.)); last=compress(put(temphi,8.)); call symput('first',first); call symput('last',last); end; retain first last flag temphi templo; run; /* retain the desired obervations */ data _wgts_; set _wgts_; /* if SHOW_CURVES is specified then retain the */ /* low and hi values of the modeled probabilites */ /* of receiving treatment */ %if &shcrv=1 %then %do; if &first=_n_ then do; call symput('lo_spt',__wgt); end; if &last=_n_ then do; call symput('hi_spt',__wgt); end; %end; if &first<=_n_<=&last; run; /* delete _SORTUSE_ */ proc datasets nolist; delete _sortuse_; run; %end; /*******************************************************************/ /* if SHOW_CURVES is specified plot the esimated densities for */ /* the treatment=0 and treatment=1 treatment probabilites. */ /* if COMMON_SUPPORT is specified include upper and lower bounds */ /*******************************************************************/ %if &shcrv=1 %then %do; proc univariate noprint data=_shcrvs_; var __wgt; class &expvar; histogram __wgt/kernel %if &cmspt=1 %then %do; href=(&lo_spt, &hi_spt) %end; ; run; %end; /* THIS CODE IS NO LONGER ACTIVE */ /* %if &shcrv=1 %then %do; /* obtain the smooths of the estimates distributions /* for response=0 ods listing close; proc kde data=_shcrvs0_; univar __wgt/out=_shcrvs0_(rename=(density=d0 value=v0)); run; /* for response=1 proc kde data=_shcrvs1_; univar __wgt/out=_shcrvs1_(rename=(density=d1 value=v1)); run; ods listing; /* if COMMON_SUPPORT is specified then obtain the maximum value /* of the density function estimates to plot boundry lines %if &cmspt %then %do; data _dens_; set _shcrvs0_ _shcrvs1_(rename=(d1=d0)); keep d0; run; proc sort data=_dens_ out=_dens_; by d0; run; data _null_; set _dens_ end=end; if end then do; call symput('maxdens',d0); end; run; %end; */ /* merge the density function estimate data sets */ /* if COMMON_SUPPORT is specified then include */ /* the boundry line information in the first */ /* two observations */ /* data _shcrvs_; merge _shcrvs0_ _shcrvs1_; %if &cmspt %then %do; if _n_=1 then do; x1=&lo_spt; y1=0; x2=&hi_spt; y2=0; end; else if _n_=2 then do; x1=&lo_spt; y1=&maxdens; x2=&hi_spt; y2=&maxdens; end; %end; run; */ /* plot */ /* proc gplot data=_shcrvs_; symbol; symbol line=1 interpol=join; plot d0*v0 d1*v1 %if &cmspt %then %do; y1*x1 y2*x2 %end; /overlay; run; quit; */ /* delete data sets */ proc datasets nolist; delete _shcrvs_ _shcrvs0_ _shcrvs1_ _dens_ _conts_ _expname_ _resname_; run; quit; /* get the double robust statistic */ %foo; /* put the total observations in the original data set into _MDR01_ */ data _mdr01_; set _mdr01_; totalobs=&totalobs; rename _freq_=usedobs; run; proc print data=_mdr01_; var totalobs usedobs dr0 dr1 deltadr se; run; /* delete temporary data sets */ proc datasets nolist; delete _dr01_ _mdr01_ _modres01_ _usedata_ _wgts_ _i2_; run; quit; /* test */ %put &input; %put; %put &data; %put &desc; %put &options; %put &wtmodel; %put &model; %put &resvar; %put &modelivs; %put &estimate; %put &wtmmeth; %put &shcrv; %put &cmspt; %put &cmsptr; %put &strata; %put &comb; %put &wtlink; %put &wtdist; %put &outps; %put &mlink; %put &mdist; %put &wtm; %put &outmod; %put &wtmodel; %put &expvar; %put &wtmivs; %put &model; %put &resvar; %put &modelivs; %put &totalobs; %put &usedobs; %mend;