Ridge regression 3

Gota Morota

February 21, 2020

Data simulation

set.seed(101)
n <- 10  # individuals
m <- 30  # markers 
W0 <- matrix(rbinom(n = n * m, size = 2, prob = 0.3), nrow = n, ncol = m)
W <- scale(W0, center = TRUE, scale = FALSE)

y <- rchisq(n, 5)
y
 [1] 2.441160 2.038945 6.339306 2.942351 7.455671 1.996304 3.271530
 [8] 5.398677 3.050813 5.325263

Ridge regression

lambda <- 0.1

# SNP effects
a <- solve(t(W) %*% W + diag(lambda, m)) %*% t(W) %*% y

Scalar form

# marker 1
a[1, ]
[1] -0.3222766
# marker 2
a[2, ]
[1] 0.4282193
# marker 1
(W[, 1] %*% (y - W[, -1] %*% matrix(a[-1, ])))/(sum(W[, 1]^2) + diag(lambda, 
    1))
           [,1]
[1,] -0.3222766
# marker 2
(W[, 2] %*% (y - W[, -2] %*% matrix(a[-2, ])))/(sum(W[, 2]^2) + diag(lambda, 
    1))
          [,1]
[1,] 0.4282193

Marker specific shrinkage

# marker 1
sum(W[, 1]^2)/(sum(W[, 1]^2) + diag(lambda, 1))
          [,1]
[1,] 0.9545455
# sum(W[,1]^2)

# marker 2
sum(W[, 2]^2)/(sum(W[, 2]^2) + diag(lambda, 1))
          [,1]
[1,] 0.9615385
# sum(W[,2]^2)

# marker 3
sum(W[, 3]^2)/(sum(W[, 3]^2) + diag(lambda, 1))
          [,1]
[1,] 0.9545455
# sum(W[,3]^2)

Allele frequency

p <- colSums(W0)/(2 * nrow(W0))
p[1:3]
[1] 0.15 0.25 0.35
maf <- ifelse(p > 0.5, 1 - p, p)
maf[1:3]
[1] 0.15 0.25 0.35