> > # enter data by hand (p. 515) > id <- (1:48) > gender <- gl(2, 24, labels = c("Female", "Male")) > alcohol <- gl(3, 8, 48, labels = c("None", "2 Pints", "4 Pints")) > attractiveness <- c(65,70,60,60,60,55,60,55,70,65,60,70,65,60,60, + 50,55,65,70,55,55,60,50,50,50,55,80,65,70,75, + 75,65,45,60,85,65,70,70,80,60,30,30,30,55,35,20,45,40) > > gogglesData <- data.frame(gender, alcohol, attractiveness) > > > > ###################### regular, non Bayesian analyses of the data ###################### > > # conventional factorial ANOVA (pp. 519-520) > > contrasts(gogglesData$alcohol) <- cbind(c(-2, 1, 1), c(0, -1, 1)) > contrasts(gogglesData$gender) <- c(-1, 1) > gogglesModel <- aov(attractiveness ~ gender+alcohol+gender:alcohol, data = gogglesData) > Anova(gogglesModel, type="III") Error in Anova(gogglesModel, type = "III") : could not find function "Anova" > > > genderEffect <- effect("gender", gogglesModel) Error in effect("gender", gogglesModel) : could not find function "effect" > summary(genderEffect) Error in summary(genderEffect) : object 'genderEffect' not found > alcoholEffect <- effect("alcohol", gogglesModel) Error in effect("alcohol", gogglesModel) : could not find function "effect" > summary(alcoholEffect) Error in summary(alcoholEffect) : object 'alcoholEffect' not found > interactionMeans <- allEffects(gogglesModel) Error in allEffects(gogglesModel) : could not find function "allEffects" > summary(interactionMeans) Error in summary(interactionMeans) : object 'interactionMeans' not found > > summary.lm(gogglesModel) Call: aov(formula = attractiveness ~ gender + alcohol + gender:alcohol, data = gogglesData) Residuals: Min 1Q Median 3Q Max -21.875 -5.625 -0.625 5.156 19.375 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 58.333 1.315 44.351 < 2e-16 *** gender1 -1.875 1.315 -1.426 0.161382 alcohol1 -2.708 0.930 -2.912 0.005727 ** alcohol2 -9.062 1.611 -5.626 1.37e-06 *** gender1:alcohol1 -2.500 0.930 -2.688 0.010258 * gender1:alcohol2 -6.562 1.611 -4.074 0.000201 *** --- Signif. codes: 0 Ô***Õ 0.001 Ô**Õ 0.01 Ô*Õ 0.05 Ô.Õ 0.1 Ô Õ 1 Residual standard error: 9.112 on 42 degrees of freedom Multiple R-squared: 0.6111, Adjusted R-squared: 0.5648 F-statistic: 13.2 on 5 and 42 DF, p-value: 9.609e-08 > > > > ###################### Bayesian factorial ANOVA using MCMCglmm ###################### > > > # the MCMCglmm function does not recognize the above contrasts for alcohol & gender, so the > # contrasts will be extracted and used instead of dose in the MCMCglmm analyses > contrast_codes <- model.matrix(~ gogglesData$alcohol + gogglesData$gender + + gogglesData$alcohol:gogglesData$gender) > gogglesData$alcohol_contrast1 <- contrast_codes[,2] > gogglesData$alcohol_contrast2 <- contrast_codes[,3] > gogglesData$gender_contrast <- contrast_codes[,4] > gogglesData$alc_gender_contrast1 <- contrast_codes[,5] > gogglesData$alc_gender_contrast2 <- contrast_codes[,6] > > > model1 = MCMCglmm(attractiveness ~ alcohol_contrast1 + alcohol_contrast2 + gender_contrast + + alc_gender_contrast1 + alc_gender_contrast2, data=gogglesData, verbose=F, + nitt=50000, burnin=3000, thin=10) > summary(model1) Iterations = 3001:49991 Thinning interval = 10 Sample size = 4700 DIC: 356.307 R-structure: ~units post.mean l-95% CI u-95% CI eff.samp units 87.62 50.63 125.6 4406 Location effects: attractiveness ~ alcohol_contrast1 + alcohol_contrast2 + gender_contrast + alc_gender_contrast1 + alc_gender_contrast2 post.mean l-95% CI u-95% CI eff.samp pMCMC (Intercept) 58.3013 55.7684 61.1002 4188 < 2e-04 *** alcohol_contrast1 -2.6768 -4.5891 -0.9190 4700 0.006809 ** alcohol_contrast2 -9.0563 -12.2616 -5.8208 4700 < 2e-04 *** gender_contrast -1.8610 -4.5358 0.7859 4790 0.165957 alc_gender_contrast1 -2.5045 -4.3976 -0.6594 5239 0.011064 * alc_gender_contrast2 -6.5534 -9.8195 -3.2800 4700 0.000851 *** --- Signif. codes: 0 Ô***Õ 0.001 Ô**Õ 0.01 Ô*Õ 0.05 Ô.Õ 0.1 Ô Õ 1 > autocorr(model1$VCV) , , units units Lag 0 1.000000000 Lag 10 0.032137070 Lag 50 0.005159442 Lag 100 -0.020578371 Lag 500 -0.009006498 > plot(model1) Hit to see next plot: Hit to see next plot: > # running the same analyses using different priors, & then plotting the various posteriors for "alcohol_contrast1" > > estimateNum = 2 # the number of regression estimate to be used for the analyses e.g., the 1st (intercept), or 2nd, etc > > Ndatasets = 5 # the number of additional analyses using different priors > > # plot data for the first Bayes model > densdat <- density(model1$Sol[,estimateNum]) > plotdatx <- densdat$x > plotdaty <- densdat$y > > # for MCMCglmm priors, B is for fixed effects. B is a list containing the expected value (mu) > # and a (co)varcv bgiance matrix (V) representing the strength of belief: the defaults are B$mu=0 > # and B$V=I*1e+10, where where I is an identity matrix of appropriate dimension > > # the different priors will be derived from the lm, non-Bayesian regression coefficients > # the mu values will be random values from a 4 SE range around the lm estimates > # the V values will be set at (4 * the SEs)**2 > > fit.list <- summary.lm(gogglesModel) > coeffs <- fit.list$coefficients > coefnames <- rownames(coeffs) > > # ranges around the lm estimates > MUranges <- cbind( (coeffs[,1] - (4 * coeffs[,2])), (coeffs[,1] + (4 * coeffs[,2])) ) > > # random values from the MUranges for each coefficient > MUs <- matrix(-9999,nrow(coeffs),Ndatasets) > for (lupe in 1:nrow(coeffs)) { MUs[lupe,] <- runif(Ndatasets, min=MUranges[lupe,1], max=MUranges[lupe,2]) } > dimnames(MUs) <-list(rep("", dim(MUs)[1])) > colnames(MUs) <- seq(1:Ndatasets) > cat('\n\nPrior regression coefficient estimates for each additional analysis:') Prior regression coefficient estimates for each additional analysis: > print(round(MUs,2)) 1 2 3 4 5 55.36 55.02 62.36 54.24 57.92 -3.02 -0.98 -6.89 -0.83 -0.38 -3.60 -5.30 -5.11 -5.81 -4.59 -5.96 -10.42 -9.66 -3.20 -5.88 -2.97 -0.08 -1.24 -4.78 -1.74 -11.93 -6.19 -2.77 -0.83 -11.95 > > # (co)variance matrix (V) representing the strength of belief - which will be (2 * the lm SEs)**2 > Vlmx2 <- diag((4 * coeffs[,2])**2) > colnames(Vlmx2) <- seq(1:nrow(MUs)) > cat('\n\nPrior variance (strength of belief) value used for each regression estimate in the additional analyses:\n', + round(diag(Vlmx2),5), sep="\n") Prior variance (strength of belief) value used for each regression estimate in the additional analyses: 27.67857 27.67857 13.83929 41.51786 13.83929 41.51786 > > > for (lupeSets in 1:Ndatasets) { + prior.list <- list(B = list(mu = MUs[,lupeSets], V = Vlmx2)) + model2 = MCMCglmm(attractiveness ~ alcohol_contrast1 + alcohol_contrast2 + gender_contrast + + alc_gender_contrast1 + alc_gender_contrast2, data=gogglesData, + prior = prior.list, nitt=50000, burnin=3000, thin=10, verbose=F) + + densdat <- density(model2$Sol[,estimateNum]) + plotdatx <- cbind( plotdatx, densdat$x) + plotdaty <- cbind( plotdaty, densdat$y) + } > > matplot( plotdatx, plotdaty, main="", xlab=paste('Estimate for', coefnames[estimateNum]), ylab='Density', + font.lab=1.8, type='l', cex.lab=1.2, col=c(2,rep(1,ncol(plotdatx))), lty=1, lwd=1 ) > title(main=paste('Density Plots of the Posteriors for', coefnames[estimateNum], '\nProduced Using Differing Priors')) > legend("topright", c("broad priors","random priors"), bty="n", lty=c(1,1), + lwd=2, cex=1.2, text.col=c(2,1), col=c(2,1) ) > > > ################ posterior predictive checks using the bayesplot package ###################### > > # from the bayesplot package documentation: > # The bayesplot package provides various plotting functions for graphical posterior predictive checking, > # that is, creating graphical displays comparing observed data to simulated data from the posterior > # predictive distribution. The idea behind posterior predictive checking is simple: if a model is a > # good fit then we should be able to use it to generate data that looks a lot like the data we observed. > > y <- gogglesData$attractiveness # the DV that was used for the MCMCglmm model > > # generating the simulated data using the simulate.MCMCglmm function from the MCMCglmm package > yrep <- simulate.MCMCglmm(model1, 25) > > yrep <- t( yrep ) # transposing yrep for the bayesplot functions > > > # ppc_stat: A histogram of the distribution of a test statistic computed by applying stat to each > # dataset (row) in yrep. The value of the statistic in the observed data, stat(y), is > # overlaid as a vertical line. > ppc_stat(y, yrep, binwidth = 1) > > > # ppc_dens_overlay: Kernel density or empirical CDF estimates of each dataset (row) in yrep are > # overlaid, with the distribution of y itself on top (and in a darker shade). > ppc_dens_overlay(y, yrep[1:25, ]) > > > # ppc_scatter_avg: A scatterplot of y against the average values of yrep, i.e., the > # points (mean(yrep[, n]), y[n]), where each yrep[, n] is a vector of length equal > # to the number of posterior draws. > ppc_scatter_avg(y, yrep) > > > # ppc_hist: A separate histogram, shaded frequency polygon, smoothed kernel density estimate, > # or box and whiskers plot is displayed for y and each dataset (row) in yrep. > # For these plots yrep should therefore contain only a small number of rows. > ppc_hist(y, yrep[1:8, ], binwidth = .3) > > > > ###################### now conduct the Bayesian analyses using the rstanarm package ###################### > > > model10 <- stan_glm(attractiveness ~ gender+alcohol+gender:alcohol, data = gogglesData, + warmup = 1000, iter = 2000, sparse = FALSE, seed = 123) SAMPLING FOR MODEL 'continuous' NOW (CHAIN 1). Gradient evaluation took 6.7e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.67 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 0.068937 seconds (Warm-up) 0.112595 seconds (Sampling) 0.181532 seconds (Total) SAMPLING FOR MODEL 'continuous' NOW (CHAIN 2). Gradient evaluation took 1.3e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.13 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 0.067907 seconds (Warm-up) 0.079539 seconds (Sampling) 0.147446 seconds (Total) SAMPLING FOR MODEL 'continuous' NOW (CHAIN 3). Gradient evaluation took 1.3e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.13 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 0.064448 seconds (Warm-up) 0.078177 seconds (Sampling) 0.142625 seconds (Total) SAMPLING FOR MODEL 'continuous' NOW (CHAIN 4). Gradient evaluation took 1.2e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.12 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 0.066748 seconds (Warm-up) 0.086594 seconds (Sampling) 0.153342 seconds (Total) > > summary(model10, digits = 3) Model Info: function: stan_glm family: gaussian [identity] formula: attractiveness ~ gender + alcohol + gender:alcohol algorithm: sampling priors: see help('prior_summary') sample: 4000 (posterior sample size) observations: 48 predictors: 6 Estimates: mean sd 2.5% 25% 50% 75% 97.5% (Intercept) 58.344 1.363 55.630 57.434 58.342 59.282 60.986 gender1 -1.825 1.392 -4.618 -2.729 -1.819 -0.883 0.945 alcohol1 -2.693 0.940 -4.581 -3.309 -2.694 -2.080 -0.769 alcohol2 -9.060 1.654 -12.323 -10.213 -9.079 -7.884 -5.928 gender1:alcohol1 -2.489 0.933 -4.306 -3.098 -2.494 -1.877 -0.635 gender1:alcohol2 -6.532 1.692 -9.789 -7.628 -6.551 -5.414 -3.200 sigma 9.348 1.055 7.546 8.608 9.254 9.982 11.801 mean_PPD 58.340 1.910 54.691 57.087 58.336 59.611 62.089 log-posterior -186.473 2.066 -191.360 -187.564 -186.143 -184.946 -183.574 Diagnostics: mcse Rhat n_eff (Intercept) 0.022 0.999 4000 gender1 0.022 1.000 4000 alcohol1 0.015 1.000 4000 alcohol2 0.026 1.000 4000 gender1:alcohol1 0.015 1.000 4000 gender1:alcohol2 0.027 0.999 4000 sigma 0.018 1.000 3319 mean_PPD 0.030 1.000 4000 log-posterior 0.052 1.001 1608 For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1). > plot(model10) >