This example illustrates how to fit weighted least squares for single-marker GWAS.

Weighted least squares for single-marker GWAS

Data

We will use the wheat data available in the BGLR R package.

rm(list = ls())
library(BGLR)
data(wheat)
`?`(wheat)
y <- 10 + wheat.Y[, 2]  # use the second phenotype
W <- wheat.X

Genomic relationship matrix

p <- colMeans(W)
maf <- ifelse(p > 0.5, 1 - p, p)  # or pmin(p, 1-p)
maf.index <- which(maf < 0.1)
p2 <- p[-maf.index]
W2 <- W[, -maf.index]
dim(W2)

Wcs <- scale(W2, center = TRUE, scale = TRUE)
G <- tcrossprod(Wcs)/ncol(Wcs)

Varinace component estimates

We will use the rrBLUP R package.

library(rrBLUP)
`?`(mixed.solve)
fit <- mixed.solve(y = y, K = G)

fit$beta  # BLUE (intercept)

fit$Vu  # additive genetic variance

fit$Ve  # residual variance

lambda <- fit$Ve/fit$Vu  # lambda

Weighted least squares

We will estimate the marker effect 1.

Wj <- W[, 1, drop = FALSE]  # first marker 
W1j <- cbind(matrix(1, nrow = nrow(Wj)), Wj)  # add the intercept to W
colnames(W1j) <- c("Intercept", "Marker1")

ed <- eigen(G)  # eigendecomposition
ed.val <- ed$values
ed.vec <- ed$vectors
lambda <- fit$Ve/fit$Vu

ystar <- t(ed.vec) %*% matrix(y)
Wstar <- t(ed.vec) %*% W1j
Omega <- 1/(ed.val + lambda)  # weight

MWMstar <- t(Wstar) %*% diag(Omega) %*% Wstar
MWystar <- t(Wstar) %*% diag(Omega) %*% ystar

Results

solve(MWMstar) %*% MWystar  # manually computed 

fitW <- lm(ystar ~ -1 + Wstar, weights = Omega)  # use the lm() function 
summary(fitW)$coef

Two-step approach

Compare the results from weighted least squares GWAS with the two-step approach.

Z <- diag(nrow(G))
I <- diag(nrow(G))
Ginv <- solve(G)
V <- G * fit$Vu + I * fit$Ve
Vinv <- solve(V)

a <- solve(t(W1j) %*% Vinv %*% W1j) %*% t(W1j) %*% Vinv %*% y  # # the intercept and the first marker effect
g <- solve(I + Ginv * lambda) %*% (y - X %*% b)  # BLUP (genetic values)

a  # the intercept and the first marker effect
head(g)
tail(g)

Mixed model equations (MME)

Compare the results from weighted least squares GWAS with the MME.

WtW <- t(W1j) %*% W1j  # X'X
WtZ <- t(W1j) %*% Z  # X'Z
ZtW <- t(Z) %*% W1j  # Z'X
ZtZ <- t(Z) %*% Z  # Z'Z
Wty <- t(W1j) %*% y  # X'y
Zty <- t(Z) %*% y  # Z'y

LHS1 <- cbind(WtW, WtZ)
LHS2 <- cbind(ZtW, ZtZ + Ginv * lambda)
LHS <- rbind(LHS1, LHS2)  # left hand side of MME
RHS <- rbind(Wty, Zty)  # right hand side of MME

sol <- solve(LHS, RHS)  # solve for BLUE and BLUP

# intercept
sol[1, ]

# effect of marker 1
sol[2, ]

# BLUP (genetic values)
sol[-c(1, 2)]

Weighted least squares single-marker GWAS

Gota Morota

March 27, 2020