SET imbalance_new;
BY _var2 _var1 variable2 variable1;
IF vartype1 = ‘C’ AND LAST.variable2 THEN
DELETE;
RUN;
PROC SORT DATA = imbalance_new;
BY _var1 variable1 _var2 variable2;
RUN;
%* Dataset IMBALANCE is to contain all interaction terms and whether they are
in the model;
DATA imbalance;
MERGE imbalance (WHERE = (_var2 ^= ‘’)) imbalance_new (KEEP = _var1
_var2 IN = in0 OBS = 1);
BY _var1 _var2;
iter = 0;
out = 0;
in = 0;
IF in0 THEN
in = 1;
RUN;
%* Dataset ALLINTER is the dataset contain all interaction terms already in
the model plus the one to be added.;
DATA allinter;
SET imbalance_new (IN = in0);
IF in0 THEN
iter = &count + 1;
RUN;
%LET n_inter = 0;
%LET new_n_inter = 1;
%LET _n_imbal_new = &_n_imbal;
%LET _n_imbal_start = &_n_imbal;
%* Add interaction terms to model and recalculate PS, _strata and
standardized bias until no more interaction terms have standardized
bias of more than &imbal_strata_crit and are not already in the model;
%DO %WHILE (&new_n_inter > 0 AND &count < &maxiter AND &_n_imbal_new ^= 0);
%LET count = %EVAL(&count + 1);
%LET n_inter = &new_n_inter;
%* Fill INTERACTIONSIN with all interaction to be fitted to the model
of this step;
DATA _NULL_;
SET allinter END = last;
CALL SYMPUT(‘_ibint’||COMPRESS(PUT(_n_, BEST.)),
COMPRESS(variable1||’*’||variable2));
IF last THEN
CALL SYMPUT(‘_nibint’, COMPRESS(PUT(_n_, BEST.)));
RUN;
%LET interactionsin =;
%DO iloop = 1 %TO &_nibint;
%LET interactionsin = &interactionsin &&_ibint&iloop;
%END;
%* Run PSMATCH to create PS and derive _strata_ *;
PROC PSMATCH DATA = _indata_ps REGION = ALLOBS;
CLASS _cohort &classvars_bin_model;
PSMODEL _cohort(Treated = “1”) = &contvars &classvars_bin_model
&always_int &interactionsin;
OUTPUT OUT = ps PS = _ps_;
RUN;
PROC SUMMARY DATA = ps NWAY;
CLASS _mergekey _cohort;
VAR _ps_;
OUTPUT OUT = ps MEAN =;
RUN;
PROC PSMATCH DATA = ps REGION = ALLOBS;
CLASS _cohort;
PSDATA TREATVAR = _cohort(Treated = “1”) PS = _ps_;
STRATA NSTRATA = &nstrata KEY = TOTAL;
OUTPUT OUT (OBS = REGION) = ps;
RUN;
DATA ps;
MERGE _indata ps;
BY _mergekey;
RUN;
%* Calculate standardized bias;
%_ps_stddiff_apmb (indata = ps);
%* Calculate IMBALANCE as ABS(stddiff) > &imbal_strata_crit and count
the number of imbalanced over strata per interaction.;
DATA _stddiff;
SET _stddiff;
stddiff = ABS(stddiff);
IF stddiff > &imbal_strata_crit THEN
imbalance = 1;
ELSE imbalance = 0;
IF vartype1 = ‘C’ THEN
DO;
_var1 = UPCASE(REVERSE(variable1));
_var1 = REVERSE(SUBSTR(_var1, INDEX(_var1, ‘_’) +
1));
END;
ELSE _var1 = variable1;
IF vartype2 = ‘C’ THEN
DO;
_var2 = UPCASE(REVERSE(variable2));
_var2 = REVERSE(SUBSTR(_var2, INDEX(_var2, ‘_’) +
1));
END;
ELSE _var2 = variable2;
RUN;
PROC SORT DATA = _stddiff;
BY _var1 _var2;
RUN;
DATA imbalance_old;
SET imbalance_new;
RUN;
PROC SUMMARY DATA = _stddiff NWAY MISSING;
CLASS variable1 _var1 variable2 _var2;
VAR imbalance stddiff;
OUTPUT OUT = imbalance_new SUM = imbalance dum1 MEAN = dum2
stddiff;
RUN;
%* For interaction involving class variable the maximum number and
maximum mean over categories is taken;
PROC SUMMARY DATA = imbalance_new NWAY MISSING;
CLASS _var1 _var2;
VAR imbalance stddiff;
OUTPUT OUT = imbalance_new MAX = imbalance max;
RUN;
%* Macro variable _N_IMBAL_NEW with number of terms (main and
interaction) with more than &imbal_nstrata_crit imbalanced strata is
created;
PROC SQL NOPRINT;
SELECT MEAN(max) INTO: _max_new FROM imbalance_new;
SELECT COMPRESS(PUT(COUNT(max), BEST.)) INTO: _n_imbal_new FROM
imbalance_new WHERE (imbalance >= &imbal_nstrata_crit);
QUIT;
%* If no improvement since last step then remove the term from the
existing terms by removing from dataset ALLINTER and setting
variables IN = 0, OUT = 1 in dataset IMBALANCE.
Select the record from dataset IMBALANCE with the next highest number
of imbalanced