K = K + 1;
END;
END;
END;
END;
END;
MV_FINAL = MV || MV_POOL;
VARNAMES={‘MV’ ‘MV_POOL’};
CREATE MVPOOL FROM MV_FINAL[COLNAME=VARNAMES];
APPEND FROM MV_FINAL;
QUIT;
PROC SORT DATA = MVPOOL;
BY MV;
RUN;
PROC SORT DATA = MS;
BY MV;
RUN;
/* The variable MVPOOL in the &OUTDATA set indicates the pooled missingness
pattern */
DATA &OUTDATA(RENAME=(MV=MP_ORIG MV_POOL=MP));
MERGE MS MVPOOL;
BY MV;
RUN;
%MEND MP_ASSIGN;
Program 4.3: The Missingness Pattern (MP) Imputation
************************************************************************
* MISSINGNESS PATTERN (MP) METHOD *
* This macro uses Proc PSMATCH to estimate propensity scores using the *
* missing pattern approach. This code calls the macro MP_ASSIGN *
* (Program 4.2) which produces the dataset DAT_MP with the pooled *
* missing patterns. *
************************************************************************;
%let VARLIST=
Age BMI_B BPIInterf_B BPIPain_B CPFQ_B FIQ_B GAD7_B ISIX_B PHQ8_B
PhysicalSymp_B SDS_B DXdur;
%MP_ASSIGN(MSDATA = REFL2, OUTDATA = DAT_MP, VARLIST = &VARLIST, N_MP_MIN = 100);
PROC MEANS DATA = DAT_MP NOPRINT;
VAR &VARLIST;
OUTPUT OUT = MN MEAN = XM1-XM12;
BY MP;
RUN;
DATA TEMP;
MERGE DAT_MP MN;
BY MP;
RUN;
DATA TEMP;
SET TEMP;
ARRAY X{12} &VARLIST;
ARRAY XM{12} XM1-XM12;
DO I = 1 TO 12;
IF X{I} = . THEN X{I} = XM{I};
END;
DROP I;
RUN;
PROC SORT DATA = TEMP;
BY MP;
RUN;
PROC PSMATCH DATA = TEMP REGION=ALLOBS;
CLASS COHORT Gender Race Dr_Rheum Dr_PrimCare;
PSMODEL COHORT(TREATED=’OPIOID’) = Gender Race Dr_Rheum Dr_PrimCare
Age BMI_B BPIInterf_B BPIPain_B CPFQ_B FIQ_B GAD7_B ISIX_B PHQ8_B
PhysicalSymp_B SDS_B DXdur;
OUTPUT OUT = DAT_PS_MP PS = _PS_;
WHERE MP=1;
RUN;
PROC PSMATCH DATA = TEMP REGION=ALLOBS;
CLASS COHORT Gender Race Dr_Rheum Dr_PrimCare;
PSMODEL COHORT(TREATED=’OPIOID’) = Gender Race Dr_Rheum Dr_PrimCare
Age BMI_B BPIInterf_B BPIPain_B CPFQ_B FIQ_B GAD7_B ISIX_B PHQ8_B
PhysicalSymp_B SDS_B;
OUTPUT OUT = DAT_PS_MP2 PS = _PS_;
WHERE MP=2;
RUN;
DATA DAT_PS_MP;
SET DAT_PS_MP1 DAT_PS_MP2;
RUN;
Programs 4.2 and 4.4 allow implementation of the MIMP approach for propensity score estimation. After missing patterns were created using Program 4.2, Program 4.4 uses PROC MI to impute missing covariates values and PROC PSMATCH to estimate the propensity score. Note the variable MP in the PSMODEL statement, which is the key to implementing this MIMP approach.
Program 4.4: Multiple Imputation Missing Pattern (MIMP) Imputation
**********************************************************************;
* Multiple Imputation Missingness Pattern (MIMP) Method;
**********************************************************************;
PROC MI DATA = DAT_MP ROUND=.001 NIMPUTE=100 SEED=123456 OUT=DAT_MIMP NOPRINT;
VAR &VARLIST BPIPain_LOCF;
RUN;
PROC PSMATCH DATA = DAT_MIMP REGION=ALLOBS;
CLASS COHORT MP GENDER RACE;
PSMODEL COHORT(TREATED=’OPIOID’) = &VARLIST MP;
OUTPUT OUT = DAT_PS_MIMP PS = _PS_;
BY _IMPUTATION_;
RUN;
4.2.3 Selection of Propensity Score Estimation Model
Once the covariates have been selected and methods for addressing any missing covariate data have been applied, several statistical models can be used to estimate the propensity scores. The most common approach has been the use of logistic regression to model the binary intervention (treated or control) selection as a function of the measured covariates:
Where
A Priori Logistic Regression Model
The first approach is to fit a logistic regression model a priori, that is, identify the covariates in the model and fix the model before estimating the propensity score. The main advantage of an a priori model is that it allows researchers to incorporate knowledge external to the data into the model building. For example, if there is evidence that a covariate is correlated to the treatment assignment, then this covariate should be included in the model even if the association between this covariate and the treatment is not strong in the current data. In addition, the a priori model is easy to interpret. The DAG approach could be very informative in building a logistic propensity score model a priori, as it clearly points out the relationship between covariates and interventions.