options(digits = 3)
c0 <- function(TT=100,k = 10, distrib = rnorm){
k <<- k # Write it upwards - we use it later..
y <- distrib(TT)
x <- matrix(nrow = TT, ncol = k)
for (i in 1:k){ x[,i] = distrib(TT) }
lm0 <- list()
pv <- NULL
signif1 <- NULL
for (i in 1:k){
lm0 <- lm(y~x[,i])
pv[i] <- summary(lm0)$coefficients[2,4] # Extract the P.values
}
return( list(p = sum(pv<.05)/k , signif1 = any(pv<.05) ))
}
c1 <- c2 <- NULL
M <- 500 # Repeat this exercise M times
for (i in 1:M){
c1[i] <- c0(TT = 100,k = 10)$p
c2[i] <- c0(TT = 100,k = 10)$signif1
}
par(mfrow = c(1,1))
plot(c1, ylim = c(0,.9), main = "Probability for finding `important` unimportant", cex.main = 2,
ylab = "Probability")
abline(a = mean(c1), b = 0, lwd = 3, col = 4)
abline(a = sum(c2)/M, b = 0, lwd = 3, col = 2)
abline(a = 0.05, b = 0, lwd = 1, col = 4, lty = 2)
abline(a = 1-.95^k, b = 0, lwd = 1, col = 2, lty = 2) # here we need k
text(x = M/5, y = mean(c1) + 0.02, "Over all false positive..", col = 4, cex = 1.2)
text(x = M/4, y = sum(c2)/M + 0.02, "That at least one is false positive..", col = 2, cex = 1.2)
legend("topright", c("Simulated prob overall","Simulated prob at least one",
"Theoretical prob overall","Theoretical prob at least one"), bty = "n",
col = c(2,4,2,4),lty=c(1,1,2,2) )