This example illustrates how to fit weighted least squares for single-marker GWAS.
Weighted least squares for single-marker GWAS
Data
We will use the wheat data available in the BGLR R package.
rm(list = ls())
library(BGLR)
data(wheat)
`?`(wheat)
y <- 10 + wheat.Y[, 2] # use the second phenotype
W <- wheat.X
Genomic relationship matrix
p <- colMeans(W)
maf <- ifelse(p > 0.5, 1 - p, p) # or pmin(p, 1-p)
maf.index <- which(maf < 0.1)
p2 <- p[-maf.index]
W2 <- W[, -maf.index]
dim(W2)
Wcs <- scale(W2, center = TRUE, scale = TRUE)
G <- tcrossprod(Wcs)/ncol(Wcs)
Varinace component estimates
We will use the rrBLUP R package.
library(rrBLUP)
`?`(mixed.solve)
fit <- mixed.solve(y = y, K = G)
fit$beta # BLUE (intercept)
fit$Vu # additive genetic variance
fit$Ve # residual variance
lambda <- fit$Ve/fit$Vu # lambda
Weighted least squares
We will estimate the marker effect 1.
Wj <- W[, 1, drop = FALSE] # first marker
W1j <- cbind(matrix(1, nrow = nrow(Wj)), Wj) # add the intercept to W
colnames(W1j) <- c("Intercept", "Marker1")
ed <- eigen(G) # eigendecomposition
ed.val <- ed$values
ed.vec <- ed$vectors
lambda <- fit$Ve/fit$Vu
ystar <- t(ed.vec) %*% matrix(y)
Wstar <- t(ed.vec) %*% W1j
Omega <- 1/(ed.val + lambda) # weight
MWMstar <- t(Wstar) %*% diag(Omega) %*% Wstar
MWystar <- t(Wstar) %*% diag(Omega) %*% ystar
Results
solve(MWMstar) %*% MWystar # manually computed
fitW <- lm(ystar ~ -1 + Wstar, weights = Omega) # use the lm() function
summary(fitW)$coef
Two-step approach
Compare the results from weighted least squares GWAS with the two-step approach.
Z <- diag(nrow(G))
I <- diag(nrow(G))
Ginv <- solve(G)
V <- G * fit$Vu + I * fit$Ve
Vinv <- solve(V)
a <- solve(t(W1j) %*% Vinv %*% W1j) %*% t(W1j) %*% Vinv %*% y # # the intercept and the first marker effect
g <- solve(I + Ginv * lambda) %*% (y - X %*% b) # BLUP (genetic values)
a # the intercept and the first marker effect
head(g)
tail(g)
Mixed model equations (MME)
Compare the results from weighted least squares GWAS with the MME.
WtW <- t(W1j) %*% W1j # X'X
WtZ <- t(W1j) %*% Z # X'Z
ZtW <- t(Z) %*% W1j # Z'X
ZtZ <- t(Z) %*% Z # Z'Z
Wty <- t(W1j) %*% y # X'y
Zty <- t(Z) %*% y # Z'y
LHS1 <- cbind(WtW, WtZ)
LHS2 <- cbind(ZtW, ZtZ + Ginv * lambda)
LHS <- rbind(LHS1, LHS2) # left hand side of MME
RHS <- rbind(Wty, Zty) # right hand side of MME
sol <- solve(LHS, RHS) # solve for BLUE and BLUP
# intercept
sol[1, ]
# effect of marker 1
sol[2, ]
# BLUP (genetic values)
sol[-c(1, 2)]