standard bias. This term
will be added in next step;
%IF NOT(&&_n_imbal_new < &_n_imbal) %THEN
%DO;
%LET _added = NOT ADDED;
DATA allinter;
SET allinter;
IF iter ^= &count;
RUN;
DATA imbalance_out;
SET imbalance_old (OBS = 1);
in = 0;
out = 1;
KEEP _var1 _var2 in out;
RUN;
DATA imbalance;
MERGE imbalance imbalance_out;
BY _var1 _var2;
RUN;
PROC SORT DATA = imbalance;
BY out in DESCENDING imbalance DESCENDING max;
RUN;
DATA imbalance_new;
SET imbalance (WHERE = (imbalance >=
&entry_nstrata_crit AND NOT in
AND NOT out) OBS = 1);
IF NOT(in OR out);
DROP in out;
RUN;
%END;
%* If improvement since last step then add term to the terms to stay
in the model. In dataset IMBALANCE var IN is set to 1.
Macro variable _N_IMBAL is updated to &_N_IMBAL_NEW. Dataset
IMBALANCE_NEW is created with the next term to be added.;
%ELSE
%DO;
%LET _added = ADDED;
DATA imbalance_keep;
SET imbalance_new;
step = &count;
RUN;
DATA imbalance;
MERGE imbalance (DROP = max imbalance)
imbalance_new (KEEP = _var1 _var2 max
imbalance WHERE = (_var2 ^= ‘’))
imbalance_old (KEEP = _var1 _var2 IN =
innew OBS = 1);
BY _var1 _var2;
out = .;
IF innew THEN
in = 1;
RUN;
%LET _n_imbal = &_n_imbal_new;
%LET _max = &&_max_new;
PROC SORT DATA = imbalance (WHERE = (in OR out)) OUT =
imbalance_prev (KEEP = _var1 _var2) NODUPKEY;
BY _var1 _var2;
RUN;
DATA imbalance_new;
MERGE imbalance_prev (IN = inp) imbalance_new
(WHERE = (_var2 ^= ‘’ AND imbalance >=
&entry_nstrata_crit));
BY _var1 _var2;
IF NOT inp;
keep = _var1;
_var1 = _var2;
_var2 = keep;
DROP keep;
RUN;
PROC SORT DATA = imbalance_new;
BY _var1 _var2;
RUN;
DATA imbalance_new;
MERGE imbalance_prev (IN = inp) imbalance_new
(WHERE = (_var2 ^= ‘’ AND imbalance >=
&entry_nstrata_crit));
BY _var1 _var2;
IF NOT inp;
keep = _var1;
_var1 = _var2;
_var2 = keep;
DROP keep;
RUN;
%* Select the interaction with the highest sum of
std.diffs. This one is the one to add;
PROC SORT DATA = imbalance_new;
BY imbalance max;
RUN;
DATA imbalance_new;
SET imbalance_new END = last;
IF last;
RUN;
%END;
%* If interaction term involves one or two class variable, get all
indicator variables to add to model;
PROC SORT NODUPKEY DATA = _stddiff (KEEP = _var1 variable1 _var2
variable2 vartype: WHERE = (_var2 ^= ‘’)) OUT = _vars;
BY _var1 _var2 variable1 variable2;
RUN;
DATA imbalance_new;
MERGE _vars imbalance_new (IN = in);
BY _var1 _var2;
IF in;
RUN;
DATA imbalance_new;
SET imbalance_new;
BY _var1 _var2 variable1 variable2;
IF vartype2 = ‘C’ AND LAST.variable1 THEN
DELETE;
RUN;
PROC SORT DATA = imbalance_new;
BY _var2 _var1 variable2 variable1;
RUN;
DATA imbalance_new;
SET imbalance_new;
BY _var2 _var1 variable2 variable1;
IF vartype1 = ‘C’ AND LAST.variable2 THEN
DELETE;
RUN;
PROC SORT DATA = imbalance_new;
BY _var1 variable1 _var2 variable2;
RUN;
PROC SORT DATA = imbalance;
BY _var1 _var2;
RUN;
* Finalize IMBALANCE_NEW and check if there is any more terms to add;
%LET new_n_inter = 0;
DATA imbalance_new;
SET imbalance_new END = last;
IF last THEN
CALL SYMPUT(‘new_n_inter’, COMPRESS(PUT(_n_, BEST.)));
RUN;
%* Dataset ALLINTER contains all interaction terms to be added in the
next step;
DATA allinter;
SET allinter imbalance_new (IN = in);
IF in THEN
iter = &count + 1;
RUN;
%PUT STEP &count: #imbalanced: &_n_imbal - &&_ibint&_nibint &_added;
%END;
%* Check