source('//groups/data/CTRHS/Sentinel/Y2 Task orders 2010/CBER-PRISM/MS Activity 12 -Seq Methods/Programs/FDA/20120113/gendata.R') source('//groups/data/CTRHS/Sentinel/Y2 Task orders 2010/CBER-PRISM/MS Activity 12 -Seq Methods/Programs/FDA/20120113/IPWseqmethods.R') ######################################################################################################## # Example 1: Create a dataset with two confounders and two sites and run an analysis # Example dataset so no real relationship between anything # Outcome set.seed(274) y<-rbinom(1000,1,.5) # Exposure x<-rbinom(1000,1,.5) # Confounders z<-cbind(z1=rbinom(1000,1,.4),z2=rnorm(1000,0,1)) # Unique identifier of site s<-c(rep(1,500),rep(2,250),rep(3,250)) # Day of exposure start<-floor(runif(1000,1,721)) # Calculate IPW estimate and Variance # Aggregated Propensity # make site indicator variable S<-cbind(as.numeric(s==2),as.numeric(s==3)) p<-glm(x~z+S,family=binomial)$fitted.values # First element is RD and second is var(RD) RD.IPW(y,x,cbind(z,S),p) # [,1] [,2] #[1,] -0.006522214 0.001000946 # Calculate IPW stratified estimate and Variance suniq<-unique(s) p_s<-NULL for (i in suniq) { #site-specific propensity scores p_s[s==i]<-glm(x[s==i]~z[s==i,],family=binomial)$fitted.values } RD.IPW_s(y,x,z,s,p_s) # estRD varRD #[1,] -0.007825543 0.0009947901 ## USE function IPWseq to assess observed risk difference and significance ################################################################################################# ## IPWseq : Function to calculate Inverse Probability Method if data is combined across sites # #input: y: a vector of observed outcomes # # x: a vector of the current indicator of exposure data # # z: a matrix of all confounders (excluding intercept) # # s: a vector of unique indicators of site # # start: a vector of the start times observed up to time t # # LTime: a vector of times of each look (length 1 if no sequential monitoring) # # delta: shape parameter for the boundary (delta=.5 is Pocock) (delta=0 is Fleming) # # nperm: Number of simulations to calculate the boundary # #output: maxstatY: a list that returns alpha, delta, signal (T or F), # # signal time (time of signal or end of study), and # # test a dataset (LTimet, LLRt, boundt, sigt) at each time point t # ################################################################################################# ## Run a One-Time look analysis on dataset ## res1a<-IPWseq(y,x,z,s,start,LTime=max(start),alpha=0.05,nperm=5000) res1a #$alpha #[1] 0.05 #$delta #[1] 0.5 #$signal #[1] FALSE #$endtime #[1] 720 #$output # LTime RD ObsStat Boundary signal #[1,] 720 -0.006522214 -0.206153 1.585075 0 #$signal_s #[1] FALSE #$endtime_s #[1] 720 #$output_s # LTime RD ObsStat Boundary signal #[1,] 720 -0.007825543 -0.2481126 1.716684 0 ## Run a sequential analysis with the first look at 180 and each consecutive look after 90 days on dataset ## LTime<-c(180,270,360,450,540,630,720) res1b<-IPWseq(y,x,z,s,start,LTime=LTime,alpha=0.05,nperm=5000) res1b #$alpha #[1] 0.05 #$delta #[1] 0.5 #$signal #[1] FALSE #$endtime #[1] 720 #$output # LTime RD ObsStat Boundary signal #[1,] 180 0.006589373 0.10674138 2.205127 0 #[2,] 270 0.015301872 0.29614820 2.205127 0 #[3,] 360 0.023609614 0.52388933 2.205127 0 #[4,] 450 0.012008981 0.29820297 2.205127 0 #[5,] 540 0.002042208 0.05555735 2.205127 0 #[6,] 630 0.009448319 0.27959023 2.205127 0 #[7,] 720 -0.006522214 -0.20615297 2.205127 0 #$signal_s #[1] FALSE #$endtime_s #[1] 720 #$output_s # LTime RD ObsStat Boundary signal #[1,] 180 0.014443438 0.23344576 2.222657 0 #[2,] 270 0.017424974 0.33961038 2.222657 0 #[3,] 360 0.021709494 0.48305627 2.222657 0 #[4,] 450 0.010867758 0.27022278 2.222657 0 #[5,] 540 0.001445813 0.03937034 2.222657 0 #[6,] 630 0.006940681 0.20588088 2.222657 0 #[7,] 720 -0.007825543 -0.24811255 2.222657 0 ######################################################################################################## # Example 2: Create a single dataset using scenario c (Two Confounders, Three Sites) and run an analysis # First Use the function datgen ################################################################################################# ## IPWseq : Function to calculate Inverse Probability Method if data is combined across sites # #input: y: a vector of observed outcomes # # x: a vector of the current indicator of exposure data # # z: a matrix of all confounders (excluding intercept) # # s: a vector of unique indicators of site # # start: a vector of the start times observed up to time t # # LTime: a vector of times of each look (length 1 if no sequential monitoring) # # delta: shape parameter for the boundary (delta=.5 is Pocock) (delta=0 is Fleming) # # nperm: Number of simulations to calculate the boundary # #output: a list that returns alpha, delta, signal (T or F) for unstratified IPW, # # endtime (time of signal or end of study) for unstratified IPW, # # output a result dataset for unstratfied IPW(LTime, RD, ObsStat, Boundary, signal), # # signal_s (T or F) for stratified IPW, # # endtime_s (time of signal or end of study) for stratified IPW, and # # output_s a result dataset for stratfied IPW (LTime, RD, ObsStat, Boundary, signal) # ################################################################################################# # Number of Observations nobs<-10000 #event rate in control group py0<-0.05 # proportion exposed px<-0.5 #risk difference RD<-0 #distributions of site (equal proportion across 3 sites Site_Dist<-as.matrix(c(1/3,1/3,1/3)) ###coefficients for Scenario C #logit(P(Y|Z,S)): first two elements log(OR) for confounders z, # last two elements log(OR) for site s Beta_y<-as.matrix(c(log(2),log(2), log(2), log(2))) #logit(P(X|Z,S)): first two elements log(OR) for confounders z, # last two elements log(OR) for site s Beta_x<-as.matrix(c(log(2), log(2), log(2),log(2))) # Study Length studylength<-720 ## CREATE SAMPLE DATASET ## dat<-datagen(Nobs=nobs,confound.fun=fun.scenc,Beta_y,Beta_x,Site_Dist,py0=py0,py1=py0+RD,px=px, studylength=studylength,Nsim=1000) ## USE function IPWseq to assess observed risk difference and significance ################################################################################################# ## IPWseq : Function to calculate Inverse Probability Method if data is combined across sites # #input: y: a vector of observed outcomes # # x: a vector of the current indicator of exposure data # # z: a matrix of all confounders (excluding intercept) # # s: a vector of unique indicators of site # # start: a vector of the start times observed up to time t # # LTime: a vector of times of each look (length 1 if no sequential monitoring) # # delta: shape parameter for the boundary (delta=.5 is Pocock) (delta=0 is Fleming) # # nperm: Number of simulations to calculate the boundary # #output: a list that returns alpha, delta, signal (T or F) for unstratified IPW, # # endtime (time of signal or end of study) for unstratified IPW, # # output a result dataset for unstratfied IPW(LTime, RD, ObsStat, Boundary, signal), # # signal_s (T or F) for stratified IPW, # # endtime_s (time of signal or end of study) for stratified IPW, and # # output_s a result dataset for stratfied IPW (LTime, RD, ObsStat, Boundary, signal) # ################################################################################################# ## Run a One-Time look analysis on dataset ## res2a<-IPWseq(dat$y,dat$x,dat$z,dat$s,dat$start,LTime=max(dat$start),alpha=0.05,nperm=5000) res2a #$alpha #[1] 0.05 #$delta #[1] 0.5 #$signal #[1] FALSE #$endtime #[1] 720 #$output # LTime RD ObsStat Boundary signal #[1,] 720 -0.004746932 -0.9996916 1.607027 0 #$signal_s #[1] FALSE #$endtime_s #[1] 720 #$output_s # LTime RD ObsStat Boundary signal #[1,] 720 -0.004747051 -1.002246 1.690283 0 ## Run a sequential analysis with the first look at 180 and each consecutive look after 90 days on dataset ## LTime<-c(180,270,360,450,540,630,720) res2b<-IPWseq(dat$y,dat$x,dat$z,dat$s,dat$start,LTime=LTime,alpha=0.05,nperm=5000) res2b #$alpha #[1] 0.05 #$delta #[1] 0.5 #$signal #[1] FALSE #$endtime #[1] 720 #$output # LTime RD ObsStat Boundary signal #[1,] 180 -0.013132368 -1.3770337 2.167671 0 #[2,] 270 -0.007908363 -1.0014974 2.167671 0 #[3,] 360 -0.006902255 -0.9984872 2.167671 0 #[4,] 450 -0.003994264 -0.6622720 2.167671 0 #[5,] 540 -0.004826854 -0.8816845 2.167671 0 #[6,] 630 -0.002317385 -0.4617267 2.167671 0 #[7,] 720 -0.004746932 -0.9996916 2.167671 0 #$signal_s #[1] FALSE #$endtime_s #[1] 720 #$output_s # LTime RD ObsStat Boundary signal #[1,] 180 -0.012956731 -1.3648563 2.208927 0 #[2,] 270 -0.007620142 -0.9744851 2.208927 0 #[3,] 360 -0.006810239 -0.9942885 2.208927 0 #[4,] 450 -0.003964363 -0.6616440 2.208927 0 #[5,] 540 -0.004789514 -0.8790237 2.208927 0 #[6,] 630 -0.002413541 -0.4813137 2.208927 0 #[7,] 720 -0.004747051 -1.0022465 2.208927 0 ########################################################################################################## # Example 3: Run a simulation to assess performance for a given set of parameters and confounder scenario # Number of Observations nobs<-10000 #event rate in control group py0<-0.05 # proportion exposed px<-0.5 #risk difference RD<-0 #distributions of site (equal proportion across 3 sites Site_Dist<-as.matrix(c(1/3,1/3,1/3)) ###coefficients for Scenario C #logit(P(Y|Z,S)): first two elements log(OR) for confounders z, # last two elements log(OR) for site s Beta_y<-as.matrix(c(log(2),log(2), log(2), log(2))) #logit(P(X|Z,S)): first two elements log(OR) for confounders z, # last two elements log(OR) for site s Beta_x<-as.matrix(c(log(2), log(2), log(2),log(2))) # RUN code for assessing one simulation scenerio ################################################################################################# ## simrun : Function to run a simulation evaluation of a single scenario # #input: Nobs: number of total observations # # LTime: a vector of times of each look (length 1 if no sequential monitoring) # # confound.fun: name of the function specifying confounder distribution (examples: # # fun.scena, fun.scenb, fun.scenc, fun.scend # # Beta_y: a vector of the log(OR) denoting the strength of confounding between # # z (confounders) and s (sites as indicator variables) on outcome # # Beta_x: a vector of the log(OR) denoting the strength of confounding between # # z (confounders) and s (sites as indicator variables) on x # # Site_Dist: a vector of the proportion of observations per site (e.g. c(1/3,1/3,1/3))# # py0: marginal probability of outcome, y, if x=0 P(Y|X=0) # # py1: marginal probability of outcome, y, if x=1 P(Y|X=1) # # px: marginal probability of x across all sites # # Nsim: number of simulations to use in the permutation test (larger is better) # # sumoutput: If True then provides summary output such as power, # # mean time to signal (sigtime), and mean time to study end (studyendtime). # # If false then provides all simulations results for each row if they # # signaled or not (signal) and minimum of end of study or time of signal # # (endtime) # #output: sumoutput=T a list that returns py0, py1, RD (risk difference), px, Beta_y, Beta_x, # # Site_Dist, outputIPW (power, sigtime, studyendtime) for unstratified IPW, # # outputIPW_s (power, sigtime, studyendtime) for stratified IPW estimator. # # sumoutput=F a list that returns py0, py1, RD (risk difference), px, Beta_y, Beta_x, # # Site_Dist, outputIPWall (signal, endtime) for unstratified IPW, # # outputIPW_sall (signal,endtime) for stratified IPW estimator. # ################################################################################################# # One time look (should take under 4 hours) set.seed(274) sim1<-simrun(Nobs=nobs,LTime=720,confound.fun=fun.scenc,Beta_y,Beta_x,Site_Dist,py0=py0,py1=py0+RD,px=px,Nsim=1000,sumoutput=T) #$py0 #[1] 0.05 #$py1 #[1] 0.05 #$RD #[1] 0 #$px #[1] 0.5 #$Beta_y # [,1] #[1,] 0.6931472 #[2,] 0.6931472 #[3,] 0.6931472 #[4,] 0.6931472 #$Beta_x # [,1] #[1,] 0.6931472 #[2,] 0.6931472 #[3,] 0.6931472 #[4,] 0.6931472 #$Site_Dist # [,1] #[1,] 0.3333333 #[2,] 0.3333333 #[3,] 0.3333333 #$outputIPW # power sigtime studyendtime #[1,] 0.052 720 720 #$outputIPW_s # power sigtime studyendtime #[1,] 0.054 720 720 # Sequential look (should take under 10 hours) set.seed(274) sim2<-simrun(Nobs=nobs,LTime=LTime,confound.fun=fun.scenc,Beta_y,Beta_x,Site_Dist,py0=py0,py1=py0+RD,px=px,Nsim=1000,sumoutput=T) #$py0 #[1] 0.05 #$py1 #[1] 0.05 #$RD #[1] 0 #$px #[1] 0.5 #$Beta_y # [,1] #[1,] 0.6931472 #[2,] 0.6931472 #[3,] 0.6931472 #[4,] 0.6931472 #$Beta_x # [,1] #[1,] 0.6931472 #[2,] 0.6931472 #[3,] 0.6931472 #[4,] 0.6931472 #$Site_Dist # [,1] #[1,] 0.3333333 #[2,] 0.3333333 #[3,] 0.3333333 #$outputIPW # power sigtime studyendtime #[1,] 0.051 312.3529 699.21 #$outputIPW_s # power sigtime studyendtime #[1,] 0.05 316.8 699.84