Ridge regression 1

Gota Morota

February 17, 2020

Simulated data

y <- c(10, 5, 8)
y
[1] 10  5  8
set.seed(40)
n <- 3  # individuals
m <- 5  # markers 
W <- matrix(rbinom(n = n * m, size = 2, prob = 0.5), nrow = n, ncol = m)
W
     [,1] [,2] [,3] [,4] [,5]
[1,]    1    0    0    0    0
[2,]    2    0    1    0    0
[3,]    1    1    1    1    2

Ordinary least squares

# W'W
t(W) %*% W
     [,1] [,2] [,3] [,4] [,5]
[1,]    6    1    3    1    2
[2,]    1    1    1    1    2
[3,]    3    1    2    1    2
[4,]    1    1    1    1    2
[5,]    2    2    2    2    4
# Inverse of W'W
try(solve(t(W) %*% W))
Error in solve.default(t(W) %*% W) : 
  Lapack routine dgesv: system is exactly singular: U[4,4] = 0
# SNP effects
try(a <- solve(t(W) %*% W) %*% t(W) %*% y)
Error in solve.default(t(W) %*% W) : 
  Lapack routine dgesv: system is exactly singular: U[4,4] = 0
# determinant
det(t(W) %*% W)
[1] 0

The role of \(\lambda\)

lambda1 <- 0.1
diag(lambda1, m)
     [,1] [,2] [,3] [,4] [,5]
[1,]  0.1  0.0  0.0  0.0  0.0
[2,]  0.0  0.1  0.0  0.0  0.0
[3,]  0.0  0.0  0.1  0.0  0.0
[4,]  0.0  0.0  0.0  0.1  0.0
[5,]  0.0  0.0  0.0  0.0  0.1
# W'W
t(W) %*% W
     [,1] [,2] [,3] [,4] [,5]
[1,]    6    1    3    1    2
[2,]    1    1    1    1    2
[3,]    3    1    2    1    2
[4,]    1    1    1    1    2
[5,]    2    2    2    2    4
# W'W + lambda1
t(W) %*% W + diag(lambda1, m)
     [,1] [,2] [,3] [,4] [,5]
[1,]  6.1  1.0  3.0  1.0  2.0
[2,]  1.0  1.1  1.0  1.0  2.0
[3,]  3.0  1.0  2.1  1.0  2.0
[4,]  1.0  1.0  1.0  1.1  2.0
[5,]  2.0  2.0  2.0  2.0  4.1
# Inverse of W'W + lambda1
solve(t(W) %*% W + diag(lambda1, m))
            [,1]        [,2]       [,3]        [,4]       [,5]
[1,]  0.67821930  0.08963251 -1.2249776  0.08963251  0.1792650
[2,]  0.08963251  8.39657405 -0.3087342 -1.60342595 -3.2068519
[3,] -1.22497759 -0.30873419  3.1082561 -0.30873419 -0.6174684
[4,]  0.08963251 -1.60342595 -0.3087342  8.39657405 -3.2068519
[5,]  0.17926501 -3.20685191 -0.6174684 -3.20685191  3.5862962
# SNP effects
a1 <- solve(t(W) %*% W + diag(lambda1, m)) %*% t(W) %*% y
a1
          [,1]
[1,]  7.367792
[2,]  1.531720
[3,] -8.711284
[4,]  1.531720
[5,]  3.063440
# determinant
det(t(W) %*% W + diag(lambda1, m))
[1] 0.10041

Different choice of \(\lambda\)

lambda2 <- 1
diag(lambda2, m)
     [,1] [,2] [,3] [,4] [,5]
[1,]    1    0    0    0    0
[2,]    0    1    0    0    0
[3,]    0    0    1    0    0
[4,]    0    0    0    1    0
[5,]    0    0    0    0    1
# W'W
t(W) %*% W
     [,1] [,2] [,3] [,4] [,5]
[1,]    6    1    3    1    2
[2,]    1    1    1    1    2
[3,]    3    1    2    1    2
[4,]    1    1    1    1    2
[5,]    2    2    2    2    4
# W'W + lambda2
t(W) %*% W + diag(lambda2, m)
     [,1] [,2] [,3] [,4] [,5]
[1,]    7    1    3    1    2
[2,]    1    2    1    1    2
[3,]    3    1    3    1    2
[4,]    1    1    1    2    2
[5,]    2    2    2    2    5
# Inverse of W'W + lambda2
solve(t(W) %*% W + diag(lambda2, m))
              [,1]          [,2]        [,3]          [,4]          [,5]
[1,]  2.500000e-01 -1.586033e-17 -0.25000000 -3.965082e-18 -7.930164e-18
[2,]  0.000000e+00  8.666667e-01 -0.06666667 -1.333333e-01 -2.666667e-01
[3,] -2.500000e-01 -6.666667e-02  0.71666667 -6.666667e-02 -1.333333e-01
[4,] -4.625929e-18 -1.333333e-01 -0.06666667  8.666667e-01 -2.666667e-01
[5,] -9.251859e-18 -2.666667e-01 -0.13333333 -2.666667e-01  4.666667e-01
# SNP effects
a2 <- solve(t(W) %*% W + diag(lambda2, m)) %*% t(W) %*% y
a2
           [,1]
[1,]  3.7500000
[2,]  0.7333333
[3,] -0.8833333
[4,]  0.7333333
[5,]  1.4666667
# determinant
det(t(W) %*% W + diag(lambda2, m))
[1] 60