Ridge regression

Ordinary least squares

set.seed(1)
n <- 3
m <- 5
X <- matrix(rbinom(n = n * m, size = 2, prob = 0.5), nrow = n, ncol = m)
X
     [,1] [,2] [,3] [,4] [,5]
[1,]    1    2    2    0    1
[2,]    1    0    1    0    1
[3,]    1    2    1    0    2
# determinant
det(t(X) %*% X)
[1] 0

The role of \(\lambda\)

lambda <- 0.1
diag(lambda, m)
     [,1] [,2] [,3] [,4] [,5]
[1,]  0.1  0.0  0.0  0.0  0.0
[2,]  0.0  0.1  0.0  0.0  0.0
[3,]  0.0  0.0  0.1  0.0  0.0
[4,]  0.0  0.0  0.0  0.1  0.0
[5,]  0.0  0.0  0.0  0.0  0.1
# determinant
det(t(X) %*% X + diag(lambda, m))
[1] 0.29931

Scalar form

set.seed(40)
lambda <- 1
X <- matrix(rbinom(n = n * m, size = 2, prob = 0.5), nrow = n, ncol = m)
X
     [,1] [,2] [,3] [,4] [,5]
[1,]    1    0    0    0    0
[2,]    2    0    1    0    0
[3,]    1    1    1    1    2
y <- c(10, 5, 8)
y
[1] 10  5  8
beta <- solve(t(X) %*% X + diag(lambda, m)) %*% t(X) %*% y
beta
           [,1]
[1,]  3.7500000
[2,]  0.7333333
[3,] -0.8833333
[4,]  0.7333333
[5,]  1.4666667
# marker 1
beta[1, ]
[1] 3.75
# marker 2
beta[2, ]
[1] 0.7333333
# marker 1
(X[, 1] %*% (y - X[, 2:5] %*% matrix(beta[-1, ])))/(sum(X[, 1]^2) + diag(lambda, 
    1))
     [,1]
[1,] 3.75
# marker 2
(X[, 2] %*% (y - X[, -2] %*% matrix(beta[-2, ])))/(sum(X[, 2]^2) + diag(lambda, 
    1))
          [,1]
[1,] 0.7333333

Marker specific shrinkage

# marker 1
sum(X[, 1]^2)/(sum(X[, 1]^2) + diag(lambda, 1))
          [,1]
[1,] 0.8571429
# marker 2
sum(X[, 2]^2)/(sum(X[, 2]^2) + diag(lambda, 1))
     [,1]
[1,]  0.5
# marker 3
sum(X[, 3]^2)/(sum(X[, 3]^2) + diag(lambda, 1))
          [,1]
[1,] 0.6666667

Gota Morota

February 2, 2017