Uwe Siebert

Real World Health Care Data Analysis


Скачать книгу

= 1 %THEN DROP = _cohort;

       RENAME = (_new_cohort = _cohort));

      SET &indata;

      %IF &_coh_tp = 2 %THEN

      %DO;

      IF &cohort = &treated THEN

      _new_cohort = 1;

      ELSE IF &cohort ^= ‘’ THEN

      _new_cohort = 0;

      %END;

      %ELSE %IF &_coh_tp = 1 %THEN

      %DO;

      IF &cohort = &treated THEN

      _new_cohort = 1;

      ELSE IF &cohort ^= . THEN

      _new_cohort = 0;

      %END;

      _mergekey = _N_;

      RUN;

      DATA _indata;

      SET _indata_keep;

      WHERE NOT MISSING(&cohort);

      RUN;

      %LET _errfound = 0;

      %GLOBAL classvars_bin interactionscont;

      %* Create binary indicator variable for all class variables and impute

       missing values if required;

      %_ps_indic(in = _indata, out = _indata_ps, full = NO);

      %LET classvars_bin_model = &classvars_bin;

      %* Run PSMATCH to create PS and derive _strata_ for step 0 - model without

       interactions *;

      PROC PSMATCH DATA = _indata_ps REGION = ALLOBS;

      CLASS _cohort &classvars_bin_model;

      PSMODEL _cohort(Treated = “1”) = &contvars &classvars_bin_model

       &always_int;

      OUTPUT OUT = ps PS = _ps_;

      RUN;

      PROC SUMMARY DATA = ps NWAY;

      CLASS _mergekey _cohort;

      VAR _ps_;

      OUTPUT OUT = ps MEAN =;

      RUN;

      %IF %SUBSTR(%UPCASE(&debug, 1, 1)) ^= Y %THEN

      OPTIONS NONOTES NOMPRINT NOMLOGIC;;

      PROC PSMATCH DATA = ps REGION = ALLOBS;

      CLASS _cohort;

      PSDATA TREATVAR = _cohort(Treated = “1”) PS = _ps_;

      STRATA NSTRATA = &nstrata KEY = TOTAL;

      OUTPUT OUT (OBS = REGION) = ps;

      RUN;

      DATA ps;

      MERGE _indata ps;

      BY _mergekey;

      RUN;

      %* Calculate standardized bias for step 0 - model without interactions;

      %_ps_stddiff_apmb (indata = ps);

      %* Calculate IMBALANCE as ABS(stddiff) > &imbal_strata_crit and count the mean

       and number of imbalanced over strata per term (main and interaction).;

      DATA _stddiff;

      SET _stddiff;

      stddiff = ABS(stddiff);

      IF stddiff > &imbal_strata_crit THEN

      imbalance = 1;

      ELSE imbalance = 0;

      IF vartype1 = ‘C’ THEN

      DO;

      _var1 = UPCASE(REVERSE(variable1));

      _var1 = REVERSE(SUBSTR(_var1, INDEX(_var1, ‘_’) + 1));

      END;

      ELSE _var1 = variable1;

      IF vartype2 = ‘C’ THEN

      DO;

      _var2 = UPCASE(REVERSE(variable2));

      _var2 = REVERSE(SUBSTR(_var2, INDEX(_var2, ‘_’) + 1));

      END;

      ELSE _var2 = variable2;

      RUN;

      PROC SORT DATA = _stddiff;

      BY _var1 _var2;

      RUN;

      PROC SUMMARY DATA = _stddiff NWAY MISSING;

      CLASS variable1 _var1 variable2 _var2;

      VAR imbalance stddiff;

      OUTPUT OUT = imbalance SUM = imbalance dum1 MEAN = dum2 stddiff;

      RUN;

      %* For interaction involving class variable the maximum number and maximum

       mean over categories is taken;

      PROC SUMMARY DATA = imbalance NWAY MISSING;

      CLASS _var1 _var2;

      VAR imbalance stddiff;

      OUTPUT OUT = imbalance (DROP = _freq_ _type_) MAX = imbalance max;

      RUN;

      %* Macro variable _N_IMBAL with number of terms (main and interaction) with

       more than &imbal_nstrata_crit imbalanced strata is created;

      PROC SQL NOPRINT;

      SELECT MEAN(max) INTO: _max FROM imbalance;

      SELECT COMPRESS(PUT(COUNT(max), BEST.)) INTO: _n_imbal FROM imbalance

       WHERE (imbalance >= &imbal_nstrata_crit);

      QUIT;

      %PUT STEP 0: #imbalanced: &_n_imbal;

      %LET count = 0;

      %* Select only the interaction terms and sort on number of imbalanced and

       mean std. bias. Select the last record. This will contain the

       interaction term to be added next;

      PROC SORT DATA = imbalance (WHERE = (_var2 ^= ‘’)) OUT = imbalance_new;

      BY imbalance max;

      RUN;

      DATA imbalance_new;

      SET imbalance_new END = last;

      IF last;

      RUN;

      %* If interaction term involves one or two class variable, get all indicator

       variables to add to model;

      PROC SORT NODUPKEY DATA = _stddiff (KEEP = _var1 variable1 _var2 variable2

       vartype:) OUT = _vars;

      BY _var1 _var2 variable1 variable2;

      RUN;

      DATA imbalance_new;

      MERGE _vars imbalance_new (IN = in);

      BY _var1 _var2;

      IF in;

      RUN;

      DATA imbalance_new;

      SET imbalance_new;

      BY _var1 _var2 variable1 variable2;

      IF vartype2 = ‘C’ AND LAST.variable1 THEN

      DELETE;

      RUN;

      PROC SORT DATA = imbalance_new;

      BY _var2 _var1 variable2 variable1;

      RUN;

      DATA